In [None]:
import pandas as pd

# Load ratings export
ratings = pd.read_csv('data/ratings_export.csv')

# Try different approaches to read the problematic CSV file
try:
    # First attempt with more robust parsing options
    movies = pd.read_csv('data/movie_data.csv', 
                        on_bad_lines='skip',
                        quoting=1,  # QUOTE_ALL
                        engine='python')  # Use python engine for more flexibility
    print("Successfully loaded movies data with method 1")
except Exception as e:
    print(f"First attempt failed: {e}")
    try:
        # Second attempt with different settings
        movies = pd.read_csv('data/movie_data.csv', 
                            on_bad_lines='skip',
                            sep=',',
                            engine='python',
                            encoding='utf-8',
                            quotechar='"',
                            escapechar='\\')
        print("Successfully loaded movies data with method 2")
    except Exception as e2:
        print(f"Second attempt failed: {e2}")
        # Third attempt reading in chunks
        chunk_list = []
        chunk_size = 10000
        try:
            for chunk in pd.read_csv('data/movie_data.csv', 
                                   chunksize=chunk_size,
                                   on_bad_lines='skip',
                                   engine='python'):
                chunk_list.append(chunk)
            movies = pd.concat(chunk_list, ignore_index=True)
            print("Successfully loaded movies data with chunking method")
        except Exception as e3:
            print(f"All attempts failed: {e3}")
            print("Loading a smaller sample of the data...")
            # Load only first 10000 rows as fallback
            movies = pd.read_csv('data/movie_data.csv', 
                               nrows=10000,
                               on_bad_lines='skip',
                               engine='python')
            print("Successfully loaded movies data with sample method")

# Load your ratings (this should work now that we've created the cleaned file)
my_ratings = pd.read_csv('data/processed/ratings_tmdb_cleaned.csv')

print(f"Loaded {len(ratings)} user ratings")
print(f"Loaded {len(movies)} movies")  
print(f"Loaded {len(my_ratings)} of your ratings")

# Merge datasets on movie_id to create a comprehensive dataset
user_ratings = pd.merge(ratings, movies, left_on='movie_id', right_on='movie_id')

FileNotFoundError: [Errno 2] No such file or directory: 'data/processed/ratings_tmdb_cleaned.csv'

In [None]:
user_ratings.head()

Unnamed: 0,_id_x,movie_id,rating_val,user_id,_id_y,genres,image_url,imdb_id,imdb_link,movie_title,...,popularity,production_countries,release_date,runtime,spoken_languages,tmdb_id,tmdb_link,vote_average,vote_count,year_released
0,5fc57c5d6758f6963451a07f,feast-2014,7,deathproof,5fc880726758f69634df0bca,"[""Animation"",""Comedy"",""Drama"",""Family""]",film-poster/2/2/0/1/9/2/220192-feast-0-230-0-3...,tt3689498,http://www.imdb.com/title/tt3689498/maindetails,Feast,...,9.26,"[""United States of America""]",2014-10-25,6.0,"[""English""]",293299.0,https://www.themoviedb.org/movie/293299/,7.9,720.0,2014.0
1,5fc57ca06758f69634538bad,feast-2014,10,ingridgoeswest,5fc880726758f69634df0bca,"[""Animation"",""Comedy"",""Drama"",""Family""]",film-poster/2/2/0/1/9/2/220192-feast-0-230-0-3...,tt3689498,http://www.imdb.com/title/tt3689498/maindetails,Feast,...,9.26,"[""United States of America""]",2014-10-25,6.0,"[""English""]",293299.0,https://www.themoviedb.org/movie/293299/,7.9,720.0,2014.0
2,5fc57cbd6758f696345475a3,feast-2014,8,dirkh,5fc880726758f69634df0bca,"[""Animation"",""Comedy"",""Drama"",""Family""]",film-poster/2/2/0/1/9/2/220192-feast-0-230-0-3...,tt3689498,http://www.imdb.com/title/tt3689498/maindetails,Feast,...,9.26,"[""United States of America""]",2014-10-25,6.0,"[""English""]",293299.0,https://www.themoviedb.org/movie/293299/,7.9,720.0,2014.0
3,5fc57ce06758f6963455400b,feast-2014,10,childrenofmen,5fc880726758f69634df0bca,"[""Animation"",""Comedy"",""Drama"",""Family""]",film-poster/2/2/0/1/9/2/220192-feast-0-230-0-3...,tt3689498,http://www.imdb.com/title/tt3689498/maindetails,Feast,...,9.26,"[""United States of America""]",2014-10-25,6.0,"[""English""]",293299.0,https://www.themoviedb.org/movie/293299/,7.9,720.0,2014.0
4,5fc57cf36758f69634558b0e,feast-2014,8,suspirliam,5fc880726758f69634df0bca,"[""Animation"",""Comedy"",""Drama"",""Family""]",film-poster/2/2/0/1/9/2/220192-feast-0-230-0-3...,tt3689498,http://www.imdb.com/title/tt3689498/maindetails,Feast,...,9.26,"[""United States of America""]",2014-10-25,6.0,"[""English""]",293299.0,https://www.themoviedb.org/movie/293299/,7.9,720.0,2014.0


In [None]:
len(user_ratings)

11079666

In [None]:
len(my_ratings)

186

In [None]:
# Filter users who have rated Interstellar 10/10
interstellar_10 = user_ratings[(user_ratings['movie_title'] == 'Interstellar') & (user_ratings['rating_val'] == 10)]

# Filter users who have rated La La Land 10/10
la_la_land_10 = user_ratings[(user_ratings['movie_title'] == 'La La Land') & (user_ratings['rating_val'] == 10)]

about_time_10 = user_ratings[(user_ratings['movie_title'] == 'About Time') & (user_ratings['rating_val'] == 10)]

# Get user_ids who rated Interstellar 10/10
interstellar_users_10 = interstellar_10['user_id']

# Get user_ids who rated La La Land 10/10
la_la_land_users_10 = la_la_land_10['user_id']

# Find common users who rated both movies 10/10
common_users_10 = interstellar_users_10[interstellar_users_10.isin(la_la_land_users_10)]
common_users_10 = common_users_10[common_users_10.isin(about_time_10['user_id'])]

print(common_users_10)

2633188          niceguys
2633211          mskiesha
2633229             milez
2633309          ianjonas
2633416    kennedylizrose
                ...      
2637153           jnawsty
2637703           cami123
2637734           erfrost
2637807            joelca
2637888             etiam
Name: user_id, Length: 67, dtype: object
