In [1]:
import setup_django
setup_django.init()

/home/mohammad/Desktop/Python/recommender_system/src


In [4]:
from ratings.models import Rating
import pandas as pd
from movies.models import Movie
from django.conf import settings

In [5]:
LINKS_SMALL_CSV = settings.DATA_DIR / 'links_small.csv'
LINKS_SMALL_CSV.exists()

True

In [6]:
qs = Rating.objects.all()
missing_movie_ids = [] 
for obj in qs: 
    if obj.content_object is None:
        missing_movie_ids.append(obj.object_id)

In [7]:
_total = len(missing_movie_ids)

In [8]:
total_missing = list(set(missing_movie_ids))

In [9]:
_total

96325

In [10]:
len(total_missing)

8883

In [11]:
qs.count()

99844

In [12]:
links_df = pd.read_csv(LINKS_SMALL_CSV)

In [13]:
links_df.head()

Unnamed: 0,movieId,imdbId,tmdbId
0,1,114709,862.0
1,2,113497,8844.0
2,3,113228,15602.0
3,4,114885,31357.0
4,5,113041,11862.0


In [14]:
ms_df = links_df.copy()[links_df.movieId.isin(total_missing)]

In [15]:
len(ms_df) == len(total_missing) # good we are working correctly

True

In [16]:
def enrich_imdb_col(val):
    val = str(val)
    if len(val) == 7:
        return 'tt' + val
    if len(val) == 6:
        return 'tt0' + val
    if len(val) == 5:
        return 'tt00' + val
    return  val

In [17]:
ms_df['tt'] = ms_df['imdbId'].apply(enrich_imdb_col)

In [18]:
ms_df.head()

Unnamed: 0,movieId,imdbId,tmdbId,tt
0,1,114709,862.0,tt0114709
1,2,113497,8844.0,tt0113497
2,3,113228,15602.0,tt0113228
3,4,114885,31357.0,tt0114885
6,7,114319,11860.0,tt0114319


In [19]:
MOVIES_CSV = settings.DATA_DIR / 'movies_metadata.csv'
MOVIES_CSV.exists()

True

In [20]:
cols = ['title', 'overview', 'release_date', 'imdb_id']
movie_df = pd.read_csv(MOVIES_CSV, usecols=cols)

In [21]:
movie_df.head()

Unnamed: 0,imdb_id,overview,release_date,title
0,tt0114709,"Led by Woody, Andy's toys live happily in his ...",1995-10-30,Toy Story
1,tt0113497,When siblings Judy and Peter discover an encha...,1995-12-15,Jumanji
2,tt0113228,A family wedding reignites the ancient feud be...,1995-12-22,Grumpier Old Men
3,tt0114885,"Cheated on, mistreated and stepped on, the wom...",1995-12-22,Waiting to Exhale
4,tt0113041,Just when George Banks has recovered from his ...,1995-02-10,Father of the Bride Part II


In [22]:
missing_movies_df = ms_df.merge(movie_df, left_on='tt', right_on='imdb_id')

In [23]:
missing_movies_df.head()

Unnamed: 0,movieId,imdbId,tmdbId,tt,imdb_id,overview,release_date,title
0,1,114709,862.0,tt0114709,tt0114709,"Led by Woody, Andy's toys live happily in his ...",1995-10-30,Toy Story
1,2,113497,8844.0,tt0113497,tt0113497,When siblings Judy and Peter discover an encha...,1995-12-15,Jumanji
2,3,113228,15602.0,tt0113228,tt0113228,A family wedding reignites the ancient feud be...,1995-12-22,Grumpier Old Men
3,4,114885,31357.0,tt0114885,tt0114885,"Cheated on, mistreated and stepped on, the wom...",1995-12-22,Waiting to Exhale
4,7,114319,11860.0,tt0114319,tt0114319,An ugly duckling having undergone a remarkable...,1995-12-15,Sabrina


In [24]:
missing_movies_df['id'] = missing_movies_df['movieId']
missing_movies_df['id_alt'] = missing_movies_df['tmdbId'].astype(int).astype(str)

In [25]:
missing_movies_df.head()

Unnamed: 0,movieId,imdbId,tmdbId,tt,imdb_id,overview,release_date,title,id,id_alt
0,1,114709,862.0,tt0114709,tt0114709,"Led by Woody, Andy's toys live happily in his ...",1995-10-30,Toy Story,1,862
1,2,113497,8844.0,tt0113497,tt0113497,When siblings Judy and Peter discover an encha...,1995-12-15,Jumanji,2,8844
2,3,113228,15602.0,tt0113228,tt0113228,A family wedding reignites the ancient feud be...,1995-12-22,Grumpier Old Men,3,15602
3,4,114885,31357.0,tt0114885,tt0114885,"Cheated on, mistreated and stepped on, the wom...",1995-12-22,Waiting to Exhale,4,31357
4,7,114319,11860.0,tt0114319,tt0114319,An ugly duckling having undergone a remarkable...,1995-12-15,Sabrina,7,11860


In [26]:
final_df = missing_movies_df.copy()[['id', 'id_alt', 'title']]

In [27]:
final_df.head()

Unnamed: 0,id,id_alt,title
0,1,862,Toy Story
1,2,8844,Jumanji
2,3,15602,Grumpier Old Men
3,4,31357,Waiting to Exhale
4,7,11860,Sabrina


In [28]:
alt_id_list = final_df['id_alt'].to_list()

In [29]:
len(alt_id_list)

8829

In [30]:
movies_qs = Movie.objects.filter(id__in = alt_id_list)

In [31]:
movies_qs.count()

735

In [32]:
from django.forms.models import model_to_dict

In [45]:
for obj in movies_qs:
    data = final_df.copy()[final_df['id_alt'] == str(obj.id)]
    if data.shape[0] == 1:
        og_model_data = model_to_dict(obj)
        updated_data = data.to_dict('records')[0]
        if obj.title == updated_data.get('title'):
            og_model_data['id'] = updated_data['id']
            obj.delete()
            obj = Movie.objects.create(**og_model_data)
            obj.save()
    # print(obj.id, data.shape)

In [46]:
from ratings.tasks import task_update_movie_ratings
task_update_movie_ratings()