In [5]:
import os
os.chdir('../movies')
from movieLens import MovieLens

In [6]:
ml = MovieLens()
ratings = ml.ratings.copy()
movies = ml.movies.copy()

In [8]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# Gráficas relvantes para comparar los algoritmos

Los resultados obtenidos con los modelos desarrollados son los siguientes:

| Modelo       | RMSE  | MAE   | MAP   | MAR   | Mean NDCG | Coverage | User Coverage | Novelty |
|--------------|-------|-------|-------|-------|-----------|----------|---------------|---------|
| user-based   | 0.903 | 0.686 | 0.729 | 0.466 | 0.951     | 0.054    | 0.996         | 4231    |
| item-based   | 0.917 | 0.697 | 0.743 | 0.448 | 0.951     | 0.020    | 1             | 5854    |
| SVD          | 0.880 | 0.676 | 0.737 | 0.448 | 0.953     | 0.038    | 0.918         | 421     |
| SVD++        | 0.868 | 0.664 | 0.743 | 0.449 | 0.954     | 0.034    | 0.908         | 694     |
| RBM          | 1.162 | 0.964 | 0.163 | 0.007 | 0.936     | 0.002    | 0             | 667     |
| AutoRec      | 2.631 | 2.300 | 0.534 | 0.127 | 0.933     | 0.018    | 1             | 2701    |
| Random       | 1.432 | 1.142 | 0.629 | 0.293 | 0.931     | 0.029    | 1             | 1846    |

A continuación se van a desarrollar una serie de gráficas que permitan analizar estos resultados en mayor profundidad

# RMSE

In [39]:
import plotly.graph_objects as go
import plotly.express as px

models = ['user-based', 'item-based', 'SVD', 'SVD++', 'RBM', 'AutoRec', 'Random']
rmse_values = [0.903, 0.917, 0.880, 0.868, 1.162, 2.631, 1.432]

# Define colors for the bars
colors = px.colors.qualitative.Dark2

fig = go.Figure(data=[
    go.Bar(x=models, y=rmse_values, marker_color=px.colors.qualitative.Dark2[4])
])

# Add labels to the bars
fig.update_traces(
    text=rmse_values,
    textposition='auto',
)

fig.update_layout(
    #title='Valores de RMSE asociados a cada modelo desarrollado',
    xaxis_title='Algoritmos',
    yaxis_title='RMSE',
    plot_bgcolor='whitesmoke'
)

fig.show()


## Recall (MAR)

In [11]:
# Count the number of ratings equal to 5 for each user
rating_counts = ratings[ratings['rating'] == 5].groupby('userId').size()
rating_counts

userId
1      124
2        6
3       10
4       64
5       10
      ... 
605     11
606     33
607     52
608     23
610    180
Length: 573, dtype: int64

In [12]:
print(str(round(len(rating_counts)/len(ratings['userId'].unique())*100,2))+"%")

93.93%


In [13]:
print(rating_counts.mean())

23.05584642233857


In [14]:
print(rating_counts.max())
print(rating_counts.min())

274
1


In [15]:
# Count the number of users who rated less than 10 items with a rating of 5
print((rating_counts < 10).sum())
print(str(round((rating_counts < 10).sum()/len(rating_counts)*100,2))+"%")

225
39.27%


In [38]:
# Define the rating ranges and corresponding labels for the plot
rating_ranges = [(0, 10), (10, 50), (50, 100), (100,rating_counts.max())]
labels = ['<10', '10-50', '50-100', '>100']

# Initialize a list to store the counts for each rating range
counts = []

# Calculate the counts for each rating range
for start, end in rating_ranges:
    count = ((rating_counts >= start) & (rating_counts < end)).sum()
    counts.append(count)

# Calculate the average number of items rated with five
average_rated_with_five = 23

# Calculate the total number of users
total_users = rating_counts.size

# Calculate the percentages for each rating range
percentages = [count / total_users * 100 for count in counts]

# Create a bar plot using Plotly
fig = go.Figure()

# Add the bar trace
fig.add_trace(go.Bar(
    x=labels,
    y=counts,
    text=[f'{count} ({percentage:.2f}%)' for count, percentage in zip(counts, percentages)],
    textposition='auto',
    marker_color=px.colors.qualitative.Dark2[4],
    #opacity=0.7,
    name='Frequency'
))

# # Add a horizontal line for the average number of items rated with five
# fig.add_shape(
#     type='line',
#     x0=-0.5,
#     y0=average_rated_with_five,
#     x1=len(labels) - 0.5,
#     y1=average_rated_with_five,
#     line=dict(
#         color='red',
#         width=2,
#         dash='dash'
#     )
# )

# Customize the layout of the plot
fig.update_layout(
    title='Distribución de valoraciones de cinco estrellas por usuario',
    xaxis_title='Número de valoraciones de cinco estrellas',
    yaxis_title='Número de usuarios',
    legend=dict(
        orientation='h',
        yanchor='bottom',
        y=1.02,
        xanchor='right',
        x=1
    ),
    plot_bgcolor='whitesmoke',
    #showgrid=False
)

# Display the plot
fig.show()
