In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import dash
import dash_bootstrap_components as dbc
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
from surprise import Reader, Dataset, NMF
from surprise.model_selection import train_test_split as surprise_train_test_split
import joblib
from fuzzywuzzy import process





In [2]:
movies = pd.read_csv("movies.csv")
ratings = pd.read_csv("ratings.csv")


In [3]:

# Preprocess movie data
movies = movies.drop('genres', axis=1)


In [4]:

# Combine movies and ratings data into a single dataset
combined_dataset = pd.merge(movies, ratings, how='left', on='movieId')


In [5]:

# Create a pivot table of movies and users with ratings
movies_and_users = ratings.pivot(index='movieId', columns='userId', values='rating').fillna(0)


In [6]:

# Create a sparse matrix representation of the movies and users data
matrix_movies_users = csr_matrix(movies_and_users.values)


In [7]:

# Train Nearest Neighbors model
model_knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20)
model_knn.fit(matrix_movies_users)


NearestNeighbors(algorithm='brute', metric='cosine', n_neighbors=20)

In [8]:

# Train Factorization Machines model using Surprise library
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
train_fm, test_fm = surprise_train_test_split(data)
model_fm = NMF()
model_fm.fit(train_fm)


<surprise.prediction_algorithms.matrix_factorization.NMF at 0x20e0464eca0>

In [9]:

# Save trained models as inference models
joblib.dump(model_knn, 'knn_model.pkl')
joblib.dump(model_fm, 'fm_model.pkl')


['fm_model.pkl']

In [10]:

def get_top_n(predictions, n=5):
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))
    
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]
    
    return top_n


# Function to generate recommendations using Factorization Machines
def recommender_fm(user_id, model, train_set, num_items=5):
    test_set = train_set.build_anti_testset()
    test_set = [(user_id, item[0], item[1]) for item in test_set]
    predictions = model.test(test_set)
    top_n = get_top_n(predictions, n=num_items)
    recommendations = pd.DataFrame(top_n[user_id], columns=['movieId', 'rating']).merge(movies, on='movieId')
    return recommendations

# Function to generate similar items using Nearest Neighbors
def recommender_knn(item_title, matrix, model, num_items=5):
    title = process.extractOne(item_title, movies['title'])[0]
    movie_index = movies[movies['title'] == title].index[0]
    distances, indices = model.kneighbors(matrix[movie_index], n_neighbors=num_items+1)
    similar_items = pd.DataFrame({'Distance': distances.flatten(), 'movieId': indices.flatten()}).merge(movies, on='movieId')
    return similar_items[1:]


In [None]:

# Initialize the Dash app
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

# Define colors
colors = {
    'background': '#F8F9F9',
    'text': '#5DADE2'
}

# Define the User Page layout
user_page_layout = html.Div(
    style={'backgroundColor': colors['background']},
    children=[
        html.H1("User Page", style={'color': colors['text'], 'fontSize': '24px'}),
        dbc.Row(
            [
                dbc.Col(
                    dcc.Dropdown(
                        id='user-dropdown',
                        options=[{'label': str(user), 'value': user} for user in ratings['userId'].unique()],
                        value=ratings['userId'].unique()[0],
                        style={'color': colors['text']}
                    ),
                    width=6
                ),
                dbc.Col(
                    dcc.Input(
                        id='num-items-input',
                        type='number',
                        min=1,
                        max=10,
                        step=1,
                        value=5,
                        style={'color': colors['text']}
                    ),
                    width=2
                ),
            ],
            style={'margin-bottom': '20px'}
        ),
        dbc.Row(
            [
                dbc.Col(
                    html.Div(id='user-history'),
                    width=6
                ),
                dbc.Col(
                    html.Div(id='user-recommendations'),
                    width=6
                ),
            ]
        ),
    ]
)

# Define the Item Page layout
item_page_layout = html.Div(
    style={'backgroundColor': colors['background']},
    children=[
        html.H1("Item Page", style={'color': colors['text'], 'fontSize': '24px'}),
        dbc.Row(
            dbc.Col(
                dcc.Dropdown(
                    id='item-dropdown',
                    options=[{'label': str(item), 'value': item} for item in ratings['movieId'].unique()],
                    value=ratings['movieId'].unique()[0],
                    style={'color': colors['text']}
                ),
                width=6
            ),
            style={'margin-bottom': '20px'}
        ),
        dbc.Row(
            [
                dbc.Col(
                    html.Div(id='item-profile'),
                    width=6
                ),
                dbc.Col(
                    html.Div(id='item-similar-items'),
                    width=6
                ),
            ]
        ),
        dbc.Row(
            dbc.Col(
                dcc.Graph(id='item-rating-distribution'),
                width=12
            ),
            style={'margin-top': '30px'}
        ),
    ]
)


# Define the callbacks for the User Page
@app.callback(
    Output('user-history', 'children'),
    Output('user-recommendations', 'children'),
    [Input('user-dropdown', 'value')],
    [Input('num-items-input', 'value')]
)
def update_user_page(selected_user, num_items):
    user_history = combined_dataset[combined_dataset['userId'] == selected_user]
    recommendations = recommender_fm(selected_user, model_fm, train_fm, num_items=num_items)
    return (
        html.Table(
            [html.Tr([html.Th(col, style={'color': colors['text']}) for col in user_history.columns])] +
            [html.Tr([html.Td(data, style={'color': colors['text']}) for data in row]) for row in user_history.values],
            style={'color': colors['text']}
        ),
        html.Table(
            [html.Tr([html.Th(col, style={'color': colors['text']}) for col in recommendations.columns])] +
            [html.Tr([html.Td(data, style={'color': colors['text']}) for data in row]) for row in recommendations.values],
            style={'color': colors['text']}
        )
    )

# Define the callbacks for the Item Page
@app.callback(
    Output('item-profile', 'children'),
    Output('item-similar-items', 'children'),
    Output('item-rating-distribution', 'figure'),
    [Input('item-dropdown', 'value')]
)
def update_item_page(selected_item):
    item_profile = movies[movies['movieId'] == selected_item]
    similar_items = recommender_knn(movies['title'][selected_item], matrix_movies_users, model_knn)

    # Create item rating distribution
    item_ratings = combined_dataset[combined_dataset['movieId'] == selected_item]['rating']
    fig = px.histogram(item_ratings, nbins=10, labels={'value': 'Rating'}, opacity=0.7)
    fig.update_layout(
        xaxis_title='Rating',
        yaxis_title='Count',
        title='Rating Distribution for Selected Item',
        plot_bgcolor=colors['background'],
        paper_bgcolor=colors['background'],
        font_color=colors['text'],
    )

    return (
        html.Table(
            [html.Tr([html.Th(col, style={'color': colors['text']}) for col in item_profile.columns])] +
            [html.Tr([html.Td(data, style={'color': colors['text']}) for data in row]) for row in item_profile.values],
            style={'color': colors['text']}
        ),
        html.Table(
            [html.Tr([html.Th(col, style={'color': colors['text']}) for col in similar_items.columns])] +
            [html.Tr([html.Td(data, style={'color': colors['text']}) for data in row]) for row in similar_items.values],
            style={'color': colors['text']}
        ),
        fig
    )

# Main app layout
app.layout = html.Div(
    style={'backgroundColor': colors['background']},
    children=[
        dcc.Tabs([
            dcc.Tab(label='User Page', value='user-page', children=user_page_layout),
            dcc.Tab(label='Item Page', value='item-page', children=item_page_layout),
        ],
        style={'color': colors['text'], 'fontSize': '24px'}
        )
    ]
)


app.run_server(debug=False)

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [26/Jun/2023 22:00:31] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [26/Jun/2023 22:00:31] "GET /_dash-component-suites/dash/deps/polyfill@7.v2_10_2m1687363739.12.1.min.js HTTP/1.1" 200 -
127.0.0.1 - - [26/Jun/2023 22:00:31] "GET /_dash-component-suites/dash/deps/react@16.v2_10_2m1687363739.14.0.min.js HTTP/1.1" 200 -
127.0.0.1 - - [26/Jun/2023 22:00:31] "GET /_dash-component-suites/dash/deps/react-dom@16.v2_10_2m1687363739.14.0.min.js HTTP/1.1" 200 -
127.0.0.1 - - [26/Jun/2023 22:00:31] "GET /_dash-component-suites/dash/deps/prop-types@15.v2_10_2m1687363739.8.1.min.js HTTP/1.1" 200 -
127.0.0.1 - - [26/Jun/2023 22:00:31] "GET /_dash-component-suites/dash_bootstrap_components/_components/dash_bootstrap_components.v0_13_1m1632809301.min.js HTTP/1.1" 200 -
127.0.0.1 - - [26/Jun/2023 22:00:31] "GET /_dash-component-suites/dash_html_components/dash_html_components.v1_0_1m1576596177.min.js HTTP/1.1" 200 -
127.0.0.1 - - [26/Jun