# Personalize Reranking
## I. Setup

In [5]:
import sys
sys.path.append('..')

In [6]:
from lib.types.dataset_type import DatasetType
from lib.types.source_type import SourceType
from lib.utils.utils import print_dict
from lib.services.user_interest_service import UserInterestService
from models.reRank_model import reRank_model

import pandas as pd

In [7]:
def construct_genre(genre):
    return str(genre).lower().replace(" ", "")


def construct_genres_list(genres):
    return " ".join([construct_genre(genre) for genre in str(genres).split("|")])

## II. Find all user interests (genres)

In [8]:
service = UserInterestService(user_id=123963, source_prefix="..")
interest_dict = service.exec()
interest_dict

  movies_df = pd.read_csv(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rating_per_user_df['movie_id'] = rating_per_user_df['movie_id'].astype(


{'TV Movie': 4.0,
 'Adventure': 3.875,
 'Foreign': 3.6666666666666665,
 'Western': 3.5,
 'History': 3.5,
 'Romance': 3.4642857142857144,
 'Horror': 3.4444444444444446,
 'Fantasy': 3.375,
 'Science Fiction': 3.35,
 'Drama': 3.3205128205128207,
 'Action': 3.2857142857142856,
 'Comedy': 3.272727272727273,
 'Crime': 3.2666666666666666,
 'Thriller': 3.261904761904762,
 'Mystery': 3.142857142857143,
 'Documentary': 2.5,
 'Family': 2.0,
 'Music': 2.0,
 'Animation': 0.5}

## III. Filter only good rated movies (rating > 3)

In [9]:
filtered_ratings = [construct_genre(genre)
           for genre, rating in interest_dict.items() if rating >= 3]
filtered_ratings

['tvmovie',
 'adventure',
 'foreign',
 'western',
 'history',
 'romance',
 'horror',
 'fantasy',
 'sciencefiction',
 'drama',
 'action',
 'comedy',
 'crime',
 'thriller',
 'mystery']

## IV. Construct dataframe to rerank

In [10]:
ranking_df = []
ranking_df.append([0, "Fake", " ".join(filtered_ratings)])

# search
for row in service.movies_df.sample(10).values:
    id = row[1]
    title = row[5]
    genres = construct_genres_list(row[9])
    ranking_df.append([id, title, genres])


df_to_rerank = pd.DataFrame(data=ranking_df, columns=["id", "title", "genres"])
df_to_rerank

Unnamed: 0,id,title,genres
0,0,Fake,tvmovie adventure foreign western history roma...
1,38410,The Poughkeepsie Tapes,horror thriller
2,232034,The Desert Song,music romance
3,59333,Official Rejection,comedy documentary
4,97666,What a Mess!,comedy
5,297207,La pazza della porta accanto: conversazione co...,
6,166666,3096 Days,crime drama
7,299511,Steak (R)évolution,family documentary adventure
8,392734,Jeremiah Tower: The Last Magnificent,documentary
9,78278,Special Delivery,comedy


## V. Rerank dataframe with model

In [11]:
model = reRank_model('Fake', df_to_rerank)
result_df = model.reRankBasedOnUserInterest()
result_df

Unnamed: 0,id,title,accuracy_score
0,1,The Poughkeepsie Tapes,0.365148
1,6,3096 Days,0.365148
2,10,The Brothers Lionheart,0.298142
3,4,What a Mess!,0.258199
4,9,Special Delivery,0.258199
5,2,The Desert Song,0.182574
6,3,Official Rejection,0.182574
7,7,Steak (R)évolution,0.149071
8,5,La pazza della porta accanto: conversazione co...,0.0
9,8,Jeremiah Tower: The Last Magnificent,0.0
