In [8]:
import pandas as pd
import csv


In [None]:
# Method 1: Try with different parameters for robust parsing
try:
    print("Attempting to read CSV with robust parameters...")
    movies = pd.read_csv(
        "TMDB_movie_dataset_v11.csv", 
        on_bad_lines='skip',
        encoding='utf-8',
        quoting=csv.QUOTE_MINIMAL,
        engine='python',  # Use Python engine which is more forgiving
        sep=',',
        low_memory=False,
        error_bad_lines=False  # For older pandas versions
    )
    print(f"Successfully loaded {len(movies)} rows")
    print("Dataset shape:", movies.shape)
    print("\nFirst few rows:")
    print(movies.head())
    
except Exception as e:
    print(f"Method 1 failed: {e}")
    
    # Method 2: Try reading in chunks
    try:
        print("\nTrying to read in chunks...")
        chunk_list = []
        chunk_size = 10000
        
        for chunk in pd.read_csv(
            "TMDB_movie_dataset_v11.csv", 
            chunksize=chunk_size,
            on_bad_lines='skip',
            encoding='utf-8',
            engine='python'
        ):
            chunk_list.append(chunk)
            print(f"Processed chunk, total rows so far: {len(chunk_list) * chunk_size}")
            
        movies = pd.concat(chunk_list, ignore_index=True)
        print(f"Successfully loaded {len(movies)} rows using chunking method")
        print("Dataset shape:", movies.shape)
        print("\nFirst few rows:")
        print(movies.head())
        
    except Exception as e2:
        print(f"Method 2 also failed: {e2}")
        
        # Method 3: Try the alternative CSV file
        try:
            print("\nTrying alternative CSV file...")
            movies = pd.read_csv("movie_data_tmbd.csv", on_bad_lines='skip')
            print(f"Successfully loaded alternative CSV with {len(movies)} rows")
            print("Dataset shape:", movies.shape)
            print("\nFirst few rows:")
            print(movies.head())
            
        except Exception as e3:
            print(f"All methods failed. Error: {e3}")
            print("You may need to manually clean the CSV file or use a different data source.")

In [9]:
movies = pd.read_csv('all_movies.csv')
movies.head()

Unnamed: 0,id,title,vote_average,vote_count,status,release_date,revenue,runtime,adult,backdrop_path,...,overview,popularity,poster_path,tagline,genres,production_companies,production_countries,spoken_languages,keywords,year
0,27205,Inception,8.364,34495,Released,2010-07-15,825532764,148,False,/8ZTVqvKDQ8emSGUEMjsS4yHAwrp.jpg,...,"Cobb, a skilled thief who commits corporate es...",83.952,/oYuLEt3zVCKq57qu2F8dT7NIa6f.jpg,Your mind is the scene of the crime.,"Action, Science Fiction, Adventure","Legendary Pictures, Syncopy, Warner Bros. Pict...","United Kingdom, United States of America","English, French, Japanese, Swahili","rescue, mission, dream, airplane, paris, franc...",2010.0
1,157336,Interstellar,8.417,32571,Released,2014-11-05,701729206,169,False,/pbrkL804c8yAv3zBZR4QPEafpAR.jpg,...,The adventures of a group of explorers who mak...,140.241,/gEU2QniE6E77NI6lCU6MxlNBvIx.jpg,Mankind was born on Earth. It was never meant ...,"Adventure, Drama, Science Fiction","Legendary Pictures, Syncopy, Lynda Obst Produc...","United Kingdom, United States of America",English,"rescue, future, spacecraft, race against time,...",2014.0
2,155,The Dark Knight,8.512,30619,Released,2008-07-16,1004558444,152,False,/nMKdUUepR0i5zn0y1T4CsSB5chy.jpg,...,Batman raises the stakes in his war on crime. ...,130.643,/qJ2tW6WMUDux911r6m7haRef0WH.jpg,Welcome to a world without rules.,"Drama, Action, Crime, Thriller","DC Comics, Legendary Pictures, Syncopy, Isobel...","United Kingdom, United States of America","English, Mandarin","joker, sadism, chaos, secret identity, crime f...",2008.0
3,19995,Avatar,7.573,29815,Released,2009-12-15,2923706026,162,False,/vL5LR6WdxWPjLPFRLe133jXWsh5.jpg,...,"In the 22nd century, a paraplegic Marine is di...",79.932,/kyeqWdyUXW608qlYkRqosgbbJyK.jpg,Enter the world of Pandora.,"Action, Adventure, Fantasy, Science Fiction","Dune Entertainment, Lightstorm Entertainment, ...","United States of America, United Kingdom","English, Spanish","future, society, culture clash, space travel, ...",2009.0
4,24428,The Avengers,7.71,29166,Released,2012-04-25,1518815515,143,False,/9BBTo63ANSmhC4e6r62OJFuK2GL.jpg,...,When an unexpected enemy emerges and threatens...,98.082,/RYMX2wcKCBAr24UyPD7xwmjaTn.jpg,Some assembly required.,"Science Fiction, Action, Adventure",Marvel Studios,United States of America,"English, Hindi, Russian","new york city, superhero, shield, based on com...",2012.0


In [3]:
# Filter movies from 2015-2024 (inclusive)
import pandas as pd

start_year = 2016
end_year = 2020

# Convert release_date to datetime if it's not already
movies['release_date'] = pd.to_datetime(movies['release_date'], errors='coerce')

# Create a year column for easier filtering
movies['year'] = movies['release_date'].dt.year

# Filter movies from 2015-2024 (inclusive)
recent_movies = movies[(movies['year'] >= start_year) & (movies['year'] <= end_year)]

# Remove rows where release_date is NaT (Not a Time) 
recent_movies = recent_movies.dropna(subset=['release_date'])

print(f"Total movies in dataset: {len(movies)}")
print(f"Movies from {start_year}-{end_year} (inclusive): {len(recent_movies)}")
print(f"Percentage of movies from {start_year}-{end_year}: {len(recent_movies)/len(movies)*100:.2f}%")

recent_movies.head()


Total movies in dataset: 1238452
Movies from 2016-2020 (inclusive): 190817
Percentage of movies from 2016-2020: 15.41%


Unnamed: 0,id,title,vote_average,vote_count,status,release_date,revenue,runtime,adult,backdrop_path,...,overview,popularity,poster_path,tagline,genres,production_companies,production_countries,spoken_languages,keywords,year
5,293660,Deadpool,7.606,28894,Released,2016-02-09,783100000,108,False,/en971MEXui9diirXlogOrPKmsEn.jpg,...,The origin story of former Special Forces oper...,72.735,/zq8Cl3PNIDGU3iWNRoc5nEZ6pCe.jpg,Witness the beginning of a happy ending.,"Action, Adventure, Comedy","20th Century Fox, The Donners' Company, Genre ...",United States of America,English,"superhero, anti hero, mercenary, based on comi...",2016.0
6,299536,Avengers: Infinity War,8.255,27713,Released,2018-04-25,2052415039,149,False,/mDfJG3LC3Dqb67AZ52x3Z0jU0uB.jpg,...,As the Avengers and their allies have continue...,154.34,/7WsyChQLEftFiDOVTGkv3hFpyyt.jpg,An entire universe. Once and for all.,"Adventure, Action, Science Fiction",Marvel Studios,United States of America,"English, Xhosa","sacrifice, magic, superhero, based on comic, s...",2018.0
15,299534,Avengers: Endgame,8.263,23857,Released,2019-04-24,2800000000,181,False,/7RyHsO4yDXtBv1zUU3mTpHeQ0d5.jpg,...,After the devastating events of Avengers: Infi...,91.756,/or06FN3Dka5tukK1e9sl16pB3iy.jpg,Avenge the fallen.,"Adventure, Science Fiction, Action",Marvel Studios,United States of America,"English, Japanese, Xhosa","superhero, time travel, space travel, time mac...",2019.0
18,475557,Joker,8.168,23425,Released,2019-10-01,1074458282,122,False,/hO7KbdvGOtDdeg0W4Y5nKEHeDDh.jpg,...,"During the 1980s, a failed stand-up comedian i...",54.522,/udDclJoHjfjb8Ekgsd4FDteOkCU.jpg,Put on a happy face.,"Crime, Thriller, Drama","Warner Bros. Pictures, Joint Effort, Village R...","Canada, United States of America",English,"dream, street gang, society, psychopath, clown...",2019.0
24,271110,Captain America: Civil War,7.4,21541,Released,2016-04-27,1155046416,147,False,/wdwcOBMkt3zmPQuEMxB3FUtMio2.jpg,...,"Following the events of Age of Ultron, the col...",70.741,/rAGiXaUfPzY7CDEyNKUofk3Kw2e.jpg,United we stand. Divided we fall.,"Adventure, Action, Science Fiction",Marvel Studios,United States of America,"Romanian, English, German, Russian","civil war, superhero, based on comic, sequel, ...",2016.0


In [4]:
recent_movies.to_csv('recent_movies.csv', index=False)


In [5]:
movies.to_csv('all_movies.csv', index=False)


In [7]:
test = pd.read_csv('all_movies.csv')
test.head()

Unnamed: 0,id,title,vote_average,vote_count,status,release_date,revenue,runtime,adult,backdrop_path,...,overview,popularity,poster_path,tagline,genres,production_companies,production_countries,spoken_languages,keywords,year
0,27205,Inception,8.364,34495,Released,2010-07-15,825532764,148,False,/8ZTVqvKDQ8emSGUEMjsS4yHAwrp.jpg,...,"Cobb, a skilled thief who commits corporate es...",83.952,/oYuLEt3zVCKq57qu2F8dT7NIa6f.jpg,Your mind is the scene of the crime.,"Action, Science Fiction, Adventure","Legendary Pictures, Syncopy, Warner Bros. Pict...","United Kingdom, United States of America","English, French, Japanese, Swahili","rescue, mission, dream, airplane, paris, franc...",2010.0
1,157336,Interstellar,8.417,32571,Released,2014-11-05,701729206,169,False,/pbrkL804c8yAv3zBZR4QPEafpAR.jpg,...,The adventures of a group of explorers who mak...,140.241,/gEU2QniE6E77NI6lCU6MxlNBvIx.jpg,Mankind was born on Earth. It was never meant ...,"Adventure, Drama, Science Fiction","Legendary Pictures, Syncopy, Lynda Obst Produc...","United Kingdom, United States of America",English,"rescue, future, spacecraft, race against time,...",2014.0
2,155,The Dark Knight,8.512,30619,Released,2008-07-16,1004558444,152,False,/nMKdUUepR0i5zn0y1T4CsSB5chy.jpg,...,Batman raises the stakes in his war on crime. ...,130.643,/qJ2tW6WMUDux911r6m7haRef0WH.jpg,Welcome to a world without rules.,"Drama, Action, Crime, Thriller","DC Comics, Legendary Pictures, Syncopy, Isobel...","United Kingdom, United States of America","English, Mandarin","joker, sadism, chaos, secret identity, crime f...",2008.0
3,19995,Avatar,7.573,29815,Released,2009-12-15,2923706026,162,False,/vL5LR6WdxWPjLPFRLe133jXWsh5.jpg,...,"In the 22nd century, a paraplegic Marine is di...",79.932,/kyeqWdyUXW608qlYkRqosgbbJyK.jpg,Enter the world of Pandora.,"Action, Adventure, Fantasy, Science Fiction","Dune Entertainment, Lightstorm Entertainment, ...","United States of America, United Kingdom","English, Spanish","future, society, culture clash, space travel, ...",2009.0
4,24428,The Avengers,7.71,29166,Released,2012-04-25,1518815515,143,False,/9BBTo63ANSmhC4e6r62OJFuK2GL.jpg,...,When an unexpected enemy emerges and threatens...,98.082,/RYMX2wcKCBAr24UyPD7xwmjaTn.jpg,Some assembly required.,"Science Fiction, Action, Adventure",Marvel Studios,United States of America,"English, Hindi, Russian","new york city, superhero, shield, based on com...",2012.0
