<a href="https://colab.research.google.com/github/Wasan-Mohammed/movie_recommendation/blob/main/Movie_Recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras import layers, models

In [2]:
rating = pd.read_csv('/content/rating.csv')
rating.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,2,3.5,2005-04-02 23:53:47
1,1,29,3.5,2005-04-02 23:31:16
2,1,32,3.5,2005-04-02 23:33:39
3,1,47,3.5,2005-04-02 23:32:07
4,1,50,3.5,2005-04-02 23:29:40


In [3]:
movie = pd.read_csv('/content/movie.csv')
movie.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


# **Data preprocess**

In [4]:
mr = pd.merge(rating, movie, how='left', on='movieId')
mr.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,2,3.5,2005-04-02 23:53:47,Jumanji (1995),Adventure|Children|Fantasy
1,1,29,3.5,2005-04-02 23:31:16,"City of Lost Children, The (Cité des enfants p...",Adventure|Drama|Fantasy|Mystery|Sci-Fi
2,1,32,3.5,2005-04-02 23:33:39,Twelve Monkeys (a.k.a. 12 Monkeys) (1995),Mystery|Sci-Fi|Thriller
3,1,47,3.5,2005-04-02 23:32:07,Seven (a.k.a. Se7en) (1995),Mystery|Thriller
4,1,50,3.5,2005-04-02 23:29:40,"Usual Suspects, The (1995)",Crime|Mystery|Thriller


In [5]:
mr.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000263 entries, 0 to 20000262
Data columns (total 6 columns):
 #   Column     Dtype  
---  ------     -----  
 0   userId     int64  
 1   movieId    int64  
 2   rating     float64
 3   timestamp  object 
 4   title      object 
 5   genres     object 
dtypes: float64(1), int64(2), object(3)
memory usage: 915.5+ MB


In [6]:
mr.describe()

Unnamed: 0,userId,movieId,rating
count,20000260.0,20000260.0,20000260.0
mean,69045.87,9041.567,3.525529
std,40038.63,19789.48,1.051989
min,1.0,1.0,0.5
25%,34395.0,902.0,3.0
50%,69141.0,2167.0,3.5
75%,103637.0,4770.0,4.0
max,138493.0,131262.0,5.0


In [12]:
if mr.isnull().values.any():
  if mr.isnull().values.sum() > 1000:
    print(mr.dropna(inplace=True))
  else:
    print(mr.isnull().values().sum())
else:
    print("no null")

no null


In [11]:
mr.duplicated().sum()

np.int64(0)

In [13]:
#drop the duplicated values but keep the lastest rating
#هنا شفت تواريخ الافلام المكرره عند كل يوزر وحذفت التكرار وخليت اخر تقييم
rating_sorted = mr.sort_values(by='timestamp', ascending=False)
rating_latest = rating_sorted.drop_duplicates(subset='movieId', keep='first')
rating_latest.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
12675921,87586,7151,3.5,2015-03-31 06:40:02,Girl with a Pearl Earring (2003),Drama|Romance
12898527,89081,52458,4.0,2015-03-31 06:11:28,Disturbia (2007),Drama|Thriller
12898546,89081,55232,3.5,2015-03-31 06:11:26,Resident Evil: Extinction (2007),Action|Horror|Sci-Fi|Thriller
2508834,16978,2093,3.5,2015-03-31 06:03:17,Return to Oz (1985),Adventure|Children|Fantasy
7819902,53930,118706,3.5,2015-03-31 06:00:51,Black Sea (2014),Adventure|Thriller


In [14]:
#حذف المستخدمين الي عندهم تقيمات اقل من 15% للافلام
total_movies = mr['movieId'].nunique()
threshold = total_movies * 0.25
user_counts = mr.groupby('userId')['movieId'].nunique()
users_to_remove = user_counts[user_counts < threshold].index
rating_filtered = mr[~mr['userId'].isin(users_to_remove)]
rating_filtered.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
1225775,8405,1,5.0,2006-10-10 07:03:58,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1225776,8405,2,3.0,2006-10-11 02:32:32,Jumanji (1995),Adventure|Children|Fantasy
1225777,8405,3,2.5,2006-11-12 13:01:01,Grumpier Old Men (1995),Comedy|Romance
1225778,8405,5,1.5,2006-11-12 13:05:23,Father of the Bride Part II (1995),Comedy
1225779,8405,6,4.0,2006-10-11 02:13:36,Heat (1995),Action|Crime|Thriller


In [27]:
#implicit Feedback it's undirected rating we just predict
#threshold = 4
mr['interaction'] = (mr['rating'] >= 3.0).astype(int)

In [28]:
#Time-based Split
timeS = mr.sort_values("timestamp")
train = mr.iloc[:int(0.8 * len(timeS))]
test = mr.iloc[int(0.8 * len(timeS)):]

In [18]:
user_encoder = LabelEncoder()
movie_encoder = LabelEncoder()

mr['user'] = user_encoder.fit_transform(mr['userId'])
mr['movie'] = movie_encoder.fit_transform(mr['movieId'])

# تقسيم البيانات إلى تدريب واختبار
train, test = train_test_split(mr, test_size=0.2)

# تعريف المدخلات والمخرجات
X_train = [train['user'], train['movie']]
y_train = train['rating']

X_test = [test['user'], test['movie']]
y_test = test['rating']

In [None]:

model = models.Sequential()

# إدخال المستخدم والفيلم كمدخلين في الشبكة العصبية
model.add(layers.InputLayer(input_shape=(1,)))
model.add(layers.Embedding(input_dim=len(user_encoder.classes_), output_dim=50))
model.add(layers.Flatten())

# إضافة طبقة مخفية
model.add(layers.Dense(328, activation='relu'))
model.add(layers.Dense(328, activation='relu'))
model.add(layers.Dense(500, activation='relu'))
model.add(layers.Dense(328, activation='relu'))
model.add(layers.Dense(500, activation='relu'))
model.add(layers.Dense(328, activation='relu'))
model.add(layers.Dense(500, activation='relu'))
model.add(layers.Dense(328, activation='relu'))
model.add(layers.Dropout(0.2))

# طبقة مخرجات (التقييم المتوقع)
model.add(layers.Dense(1))

# تجميع النموذج
model.compile(optimizer='adam', loss='mean_squared_error')

# عرض ملخص النموذج
model.summary()

In [26]:
history = model.fit(
    [X_train[0], X_train[1]],
    y_train,
    epochs=5,
    batch_size=1500,
    validation_data=([X_test[0], X_test[1]], y_test)
)

Epoch 1/5
[1m10667/10667[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 5ms/step - loss: 0.9198 - val_loss: 0.9334
Epoch 2/5
[1m10667/10667[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 5ms/step - loss: 0.9191 - val_loss: 0.9324
Epoch 3/5
[1m10667/10667[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 5ms/step - loss: 0.9188 - val_loss: 0.9327
Epoch 4/5
[1m10667/10667[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 5ms/step - loss: 0.9189 - val_loss: 0.9330
Epoch 5/5
[1m10667/10667[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 5ms/step - loss: 0.9182 - val_loss: 0.9323


In [None]:
from sklearn.metrics import mean_squared_error
import numpy as np

# حساب التنبؤات باستخدام النموذج
predictions = model.predict([X_test[0], X_test[1]])

# حساب RMSE (Root Mean Squared Error)
rmse = np.sqrt(mean_squared_error(y_test, predictions))
print(f"RMSE: {rmse}")

[1m124983/125002[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step

Expected: keras_tensor
Received: inputs=('Tensor(shape=(None, 1))', 'Tensor(shape=(None, 1))')


[1m125002/125002[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m216s[0m 2ms/step


In [None]:
test_loss = model.evaluate([X_test[0], X_test[1]], y_test)
print(f'Test Loss: {test_loss}')

In [None]:

user_input = np.array([user_encoder.transform([123])])  # ID المستخدم 123
movie_input = np.array([movie_encoder.transform([10])])  # ID الفيلم 10

# التنبؤ بالتقييم
predicted_rating = model.predict([user_input, movie_input])
print(f"Predicted rating for user 123 and movie 10: {predicted_rating[0][0]}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
Predicted rating for user 123 and movie 10: 2.882351875305176
