## LIBIES

In [50]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

In [51]:
df = pd.read_csv(r"C:\Users\Original Nassar\Downloads\ratings.csv")

In [52]:
df.head()

Unnamed: 0,196,242,3,881250949
0,186,302,3,891717742
1,22,377,1,878887116
2,244,51,2,880606923
3,166,346,1,886397596
4,298,474,4,884182806


In [53]:
df.columns = ['user_id', 'movie_id', 'rating', 'timestamp']

In [24]:
df.head()

Unnamed: 0,user_id,movie_id,rating,timestamp
0,186,302,3,891717742
1,22,377,1,878887116
2,244,51,2,880606923
3,166,346,1,886397596
4,298,474,4,884182806


In [54]:
print(df.columns)

Index(['user_id', 'movie_id', 'rating', 'timestamp'], dtype='object')


## user similarity

In [65]:
user_item_matrix = df.pivot_table(index='user_id', columns='movie_id', values='rating')

In [66]:
from sklearn.metrics.pairwise import cosine_similarity


user_item_matrix_filled = user_item_matrix.fillna(0)

user_similarity = cosine_similarity(user_item_matrix_filled)

user_similarity_df = pd.DataFrame(user_similarity, 
                                  index=user_item_matrix.index, 
                                  columns=user_item_matrix.index)


In [71]:
item_similarity = cosine_similarity(user_item_matrix_filled.T)
item_similarity_df = pd.DataFrame(item_similarity, 
                                  index=user_item_matrix.columns, 
                                  columns=user_item_matrix.columns)


In [72]:
user_similarity_df.head()

user_id,1,2,3,4,5,6,7,8,9,10,...,934,935,936,937,938,939,940,941,942,943
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.0,0.166931,0.04746,0.064358,0.378475,0.430239,0.440367,0.319072,0.078138,0.376544,...,0.369527,0.119482,0.274876,0.189705,0.197326,0.118095,0.314072,0.148617,0.179508,0.398175
2,0.166931,1.0,0.110591,0.178121,0.072979,0.245843,0.107328,0.103344,0.161048,0.159862,...,0.156986,0.307942,0.358789,0.424046,0.319889,0.228583,0.22679,0.161485,0.172268,0.105798
3,0.04746,0.110591,1.0,0.344151,0.021245,0.072415,0.066137,0.08306,0.06104,0.065151,...,0.031875,0.042753,0.163829,0.069038,0.124245,0.026271,0.16189,0.101243,0.133416,0.026556
4,0.064358,0.178121,0.344151,1.0,0.031804,0.068044,0.09123,0.18806,0.101284,0.060859,...,0.052107,0.036784,0.133115,0.193471,0.146058,0.030138,0.196858,0.152041,0.170086,0.058752
5,0.378475,0.072979,0.021245,0.031804,1.0,0.237286,0.3736,0.24893,0.056847,0.201427,...,0.338794,0.08058,0.094924,0.079779,0.148607,0.071459,0.239955,0.139595,0.152497,0.313941


In [73]:

def recommend_movies(user_id, user_item_matrix, item_similarity, k=5):
    user_ratings = user_item_matrix.loc[user_id]
    seen_movies = user_ratings.dropna().index

    scores = {}

    for movie in seen_movies:
        similar_movies = item_similarity[movie].dropna()
        for sim_movie, sim_score in similar_movies.items():
            if sim_movie not in seen_movies:
                scores[sim_movie] = scores.get(sim_movie, 0) + sim_score * user_ratings[movie]

    sorted_scores = sorted(scores.items(), key=lambda x: x[1], reverse=True)
    recommended_movies = [movie for movie, score in sorted_scores[:k]]
    return recommended_movies



In [74]:
from scipy.sparse.linalg import svds
import numpy as np

# fill missing values with 0 (or mean)
R = user_item_matrix.fillna(0).values

# apply SVD
U, sigma, Vt = svds(R, k=50)
sigma = np.diag(sigma)

# predicted ratings
predicted_ratings = np.dot(np.dot(U, sigma), Vt)


In [75]:
recommendations = recommend_movies(user_id=1, user_item_matrix=user_item_matrix, item_similarity=item_similarity_df, k=5)
print("Recommended Movies:", recommendations)



Recommended Movies: [423, 655, 568, 403, 385]
