In [1]:
import pandas as pd

In [3]:
data_path = "C:/Users/Tema Store/Downloads/ml-100k/"

# تحميل u.data مع تسمية الأعمدة
ratings_cols = ['user_id', 'movie_id', 'rating', 'timestamp']
ratings = pd.read_csv(data_path + "u.data", sep='\t', names=ratings_cols, encoding='latin-1')


In [4]:
ratings.head()

Unnamed: 0,user_id,movie_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [5]:
movies_cols = ['movie_id', 'title']
movies = pd.read_csv(data_path + "u.item", sep='|', names=movies_cols, usecols=[0, 1], encoding='latin-1')

In [6]:
movies.head()

Unnamed: 0,movie_id,title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


In [10]:
df = pd.merge(ratings, movies, on='movie_id')

df.head(10)

Unnamed: 0,user_id,movie_id,rating,timestamp,title
0,196,242,3,881250949,Kolya (1996)
1,63,242,3,875747190,Kolya (1996)
2,226,242,5,883888671,Kolya (1996)
3,154,242,3,879138235,Kolya (1996)
4,306,242,5,876503793,Kolya (1996)
5,296,242,4,884196057,Kolya (1996)
6,34,242,5,888601628,Kolya (1996)
7,271,242,4,885844495,Kolya (1996)
8,201,242,4,884110598,Kolya (1996)
9,209,242,4,883589606,Kolya (1996)


In [24]:
user_id_map = {id: idx for idx, id in enumerate(df['user_id'].unique())}
df['user'] = df['user_id'].map(user_id_map)

movie_id_map = {id: idx for idx, id in enumerate(df['movie_id'].unique())}
df['movie'] = df['movie_id'].map(movie_id_map)

df[['user_id', 'user', 'movie_id', 'movie']].head()

Unnamed: 0,user_id,user,movie_id,movie
0,196,0,242,0
1,63,1,242,0
2,226,2,242,0
3,154,3,242,0
4,306,4,242,0


In [25]:
from sklearn.model_selection import train_test_split

In [26]:
train_df, test_df = train_test_split(df, test_size=0.1, random_state=42)

print(f"🔧 عدد العينات في التدريب: {len(train_df)}")
print(f"🧪 عدد العينات في الاختبار: {len(test_df)}")

🔧 عدد العينات في التدريب: 90000
🧪 عدد العينات في الاختبار: 10000


In [27]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dot, Dense

In [33]:
print(df.columns)


Index(['user_id', 'movie_id', 'rating', 'timestamp', 'title', 'user', 'movie'], dtype='object')


In [28]:
num_users = df['user'].nunique()
num_movies = df['movie'].nunique()
embedding_size = 50  # عدد أبعاد كل متجه

In [29]:
# مدخل المستخدم
user_input = Input(shape=(1,))
user_embedding = Embedding(num_users, embedding_size)(user_input)
user_vec = Flatten()(user_embedding)

In [30]:
movie_input = Input(shape=(1,))
movie_embedding = Embedding(num_movies, embedding_size)(movie_input)
movie_vec = Flatten()(movie_embedding)


In [31]:
dot_product = Dot(axes=1)([user_vec, movie_vec])

In [32]:
model = Model(inputs=[user_input, movie_input], outputs=dot_product)
model.compile(optimizer='adam', loss='mse')  # loss = mean squared error

model.summary()

In [34]:
import numpy as np

In [35]:
X = [df['user'].values, df['movie'].values]  # المستخدمين والأفلام كـ input
y = df['rating'].values                      # التقييمات كـ output


In [39]:
user_array = df['user'].values
movie_array = df['movie'].values
rating_array = df['rating'].values

X_train_user, X_test_user, X_train_movie, X_test_movie, y_train, y_test = train_test_split(
    user_array, movie_array, rating_array, test_size=0.1, random_state=42
)


In [40]:
model.fit(
    x=[X_train_user, X_train_movie],
    y=y_train,
    validation_data=([X_test_user, X_test_movie], y_test),
    epochs=10,
    batch_size=64
)

Epoch 1/10
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - loss: 12.0324 - val_loss: 1.6883
Epoch 2/10
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 1.3402 - val_loss: 1.0001
Epoch 3/10
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 0.9179 - val_loss: 0.9198
Epoch 4/10
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.8396 - val_loss: 0.8925
Epoch 5/10
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.7936 - val_loss: 0.8728
Epoch 6/10
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.7454 - val_loss: 0.8654
Epoch 7/10
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.7181 - val_loss: 0.8589
Epoch 8/10
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.6780 - val_loss: 0.8574
Epoch 9/10
[1m1407/140

<keras.src.callbacks.history.History at 0x1c4cf2ccb10>

In [41]:
test_loss = model.evaluate([X_test_user, X_test_movie], y_test)
print(f"Test Loss (MSE): {test_loss:.4f}")


[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.8509
Test Loss (MSE): 0.8587


In [44]:
import numpy as np

user_id = 10
movie_id = 50

# حول الـ inputs إلى numpy arrays وشكلهم كأعمدة (batch_size = 1)
user_input = np.array([user_id])
movie_input = np.array([movie_id])

# التنبؤ
predicted_rating = model.predict([user_input, movie_input])
print(f"Predicted rating by user {user_id} for movie {movie_id}: {predicted_rating[0][0]:.2f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step
Predicted rating by user 10 for movie 50: 2.32


In [46]:
# تحميل بيانات الأفلام (لو مش قرأتها قبل كدا)
df_movies = pd.read_csv(
    'C:/Users/Tema Store/Downloads/ml-100k/u.item',
    sep='|',
    encoding='latin-1',
    header=None
)

# تسمية الأعمدة
df_movies.columns = ['movie_id', 'title'] + [f'col_{i}' for i in range(22)]

# ربط movie_id بـ movie (اللي هو بعد إعادة الترقيم)
df_movies['movie'] = df_movies['movie_id'].map(movie_id_map)

# دالة التوصية
def recommend_movies(user_id, df_movies, model, top_n=5):
    movie_ids = np.arange(num_movies)
    user_ids = np.full_like(movie_ids, user_id)
    
    # توقعات التقييمات
    predictions = model.predict([user_ids, movie_ids], verbose=0)
    
    # ترتيب التوقعات تنازليًا
    top_indices = predictions.flatten().argsort()[::-1][:top_n]
    
    # استخراج أفلام التوصية
    recommended_ids = movie_ids[top_indices]
    recommended_movies = df_movies[df_movies['movie'].isin(recommended_ids)]
    
    return recommended_movies[['movie_id', 'title']]

# تجربة التوصية لمستخدم معين
recommendations = recommend_movies(user_id=6, df_movies=df_movies, model=model, top_n=5)
print(recommendations)


      movie_id                     title
407        408     Close Shave, A (1995)
957        958   To Live (Huozhe) (1994)
1366      1367              Faust (1994)
1448      1449    Pather Panchali (1955)
1641      1642  Some Mother's Son (1996)
