# Personalize Reranking
## I. Setup

In [8]:
import sys
sys.path.append('..')

In [9]:
from lib.types.dataset_type import DatasetType
from lib.types.source_type import SourceType
from lib.utils.utils import print_dict
from lib.services.user_interest_service import UserInterestService
from models.reRank_model import reRank_model

import pandas as pd

In [10]:
def construct_genre(genre):
    return str(genre).lower().replace(" ", "")


def construct_genres_list(genres):
    return " ".join([construct_genre(genre) for genre in str(genres).split("|")])

## II. Find all user interests (genres)

In [11]:
service = UserInterestService(user_id=123963, source_prefix="..")
interest_dict = service.exec()
interest_dict

  movies_df = pd.read_csv(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rating_per_user_df['movie_id'] = rating_per_user_df['movie_id'].astype(


{'TV Movie': 4.0,
 'Adventure': 3.875,
 'Foreign': 3.6666666666666665,
 'Western': 3.5,
 'History': 3.5,
 'Romance': 3.4642857142857144,
 'Horror': 3.4444444444444446,
 'Fantasy': 3.375,
 'Science Fiction': 3.35,
 'Drama': 3.3205128205128207,
 'Action': 3.2857142857142856,
 'Comedy': 3.272727272727273,
 'Crime': 3.2666666666666666,
 'Thriller': 3.261904761904762,
 'Mystery': 3.142857142857143,
 'Documentary': 2.5,
 'Family': 2.0,
 'Music': 2.0,
 'Animation': 0.5}

## III. Filter only good rated movies (rating > 3)

In [31]:
filtered_ratings = [construct_genre(genre)
           for genre, rating in interest_dict.items() if rating >= 3]

print_dict(filtered_ratings)

[
    "tvmovie",
    "adventure",
    "foreign",
    "western",
    "history",
    "romance",
    "horror",
    "fantasy",
    "sciencefiction",
    "drama",
    "action",
    "comedy",
    "crime",
    "thriller",
    "mystery"
]


## IV. Construct dataframe to rerank

In [32]:
ranking_df = []
ranking_df.append([0, "Fake", " ".join(filtered_ratings)])

# search
for row in service.movies_df.sample(10).values:
    id = row[1]
    title = row[5]
    genres = construct_genres_list(row[9])
    ranking_df.append([id, title, genres])


df_to_rerank = pd.DataFrame(data=ranking_df, columns=["id", "title", "genres"])
df_to_rerank

df_to_rerank.tail(10)

Unnamed: 0,id,title,genres
1,127286,Women Talking Dirty,
2,45013,Burning Palms,comedy drama family
3,1615,Erkan & Stefan 2,comedy
4,83310,"If Footmen Tire You, What Will Horses Do?",
5,32921,Captains of the Clouds,action drama war
6,80281,Oosaravelli,action romance foreign
7,300532,Unabomber: The True Story,tvmovie crime drama history
8,52475,Young Goethe in Love,romance drama
9,89070,Stone,thriller
10,108048,"Gwen, the Book of Sand",sciencefiction animation adventure fantasy


## V. Rerank dataframe with model

In [33]:
model = reRank_model('Fake', df_to_rerank)
result_df = model.reRankBasedOnUserInterest()
result_df

Unnamed: 0,id,title,accuracy_score
0,7,Unabomber: The True Story,0.516398
1,6,Oosaravelli,0.447214
2,10,"Gwen, the Book of Sand",0.387298
3,8,Young Goethe in Love,0.365148
4,2,Burning Palms,0.298142
5,5,Captains of the Clouds,0.298142
6,3,Erkan & Stefan 2,0.258199
7,9,Stone,0.258199
8,1,Women Talking Dirty,0.0
9,4,"If Footmen Tire You, What Will Horses Do?",0.0
