<a href="https://colab.research.google.com/github/jc77-7/Recommender-System-1/blob/main/NCF_ON_FEB.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
data = pd.read_csv("/content/2020-Feb.csv")
data.head()

Unnamed: 0,event_time,event_type,product_id,category_id,category_code,brand,price,user_id,user_session
0,2020-02-01 00:00:01 UTC,cart,5844305,1.48758e+18,,,2.14,485174092.0,4be9643a-420b-4c6b-83dd-a15e772fbf7a
1,2020-02-01 00:00:03 UTC,view,5769925,1.48758e+18,,kapous,4.22,594621622.0,a88baf11-9cd0-4362-bde4-1bfeed3f641d
2,2020-02-01 00:00:08 UTC,view,5817765,1.48758e+18,,zeitun,11.03,495404942.0,3a569c8d-d848-4f09-a925-33f673d84c46
3,2020-02-01 00:00:11 UTC,view,5877033,1.48758e+18,,milv,3.49,564814969.0,7feb39e5-bb7b-4b2b-a546-3e3b7c56326e
4,2020-02-01 00:00:12 UTC,cart,5814871,1.48758e+18,,zinger,2.54,551205603.0,106a7c7f-7fa1-4463-8bec-0ea7b02191cb


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow import keras
from sklearn.preprocessing import LabelEncoder

# Load the data (replace with your file path)
data = pd.read_csv("/content/2020-Feb.csv")  # Replace 'your_data.csv'

# Preprocessing
data = data.dropna(subset=['user_id', 'product_id', 'event_type', 'brand'])  # Drop rows with missing essential data, including 'brand'

# Convert event_time to datetime and extract relevant time features if needed
data['event_time'] = pd.to_datetime(data['event_time'])

# Filter for relevant event types (e.g., 'cart', 'purchase', 'view')
relevant_events = ['cart', 'purchase', 'view']  # adjust as needed
data = data[data['event_type'].isin(relevant_events)]

# Label encoding for user and product IDs
user_encoder = LabelEncoder()
product_encoder = LabelEncoder()

data['user_id_encoded'] = user_encoder.fit_transform(data['user_id'])
data['product_id_encoded'] = product_encoder.fit_transform(data['product_id'])

num_users = len(user_encoder.classes_)
num_products = len(product_encoder.classes_)

# Create interaction matrix (implicit feedback)
# Here, we'll assign higher weights to 'purchase' events.
data['interaction'] = 1  # Default interaction value
data.loc[data['event_type'] == 'cart', 'interaction'] = 2
data.loc[data['event_type'] == 'purchase', 'interaction'] = 3

# Train-test split
train, test = train_test_split(data, test_size=0.2, random_state=42)

# NCF Model
embedding_size = 50

user_input = keras.layers.Input(shape=(1,))
item_input = keras.layers.Input(shape=(1,))

user_embedding = keras.layers.Embedding(num_users, embedding_size)(user_input)
item_embedding = keras.layers.Embedding(num_products, embedding_size)(item_input)

user_vec = keras.layers.Flatten()(user_embedding)
item_vec = keras.layers.Flatten()(item_embedding)

concat = keras.layers.Concatenate()([user_vec, item_vec])

dense1 = keras.layers.Dense(128, activation='relu')(concat)
dense2 = keras.layers.Dense(64, activation='relu')(dense1)
output = keras.layers.Dense(1, activation='sigmoid')(dense2)  # Sigmoid for implicit feedback

model = keras.Model(inputs=[user_input, item_input], outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Training the model
model.fit(
    [train['user_id_encoded'], train['product_id_encoded']],
    train['interaction'] / 3,
    epochs=10,
    batch_size=256,  # Increased batch size
    validation_split=0.1,
)


# Recommendations
def get_recommendations_with_brands(user_id, num_recommendations=5):
    encoded_user_id = user_encoder.transform([user_id])[0]
    all_product_ids = np.arange(num_products)
    user_input_data = np.full(len(all_product_ids), encoded_user_id)

    predictions = model.predict([user_input_data, all_product_ids]).flatten()

    top_product_indices = predictions.argsort()[-num_recommendations:][::-1]
    top_product_ids_encoded = all_product_ids[top_product_indices]
    top_product_ids = product_encoder.inverse_transform(top_product_ids_encoded)

    recommendations = []
    for product_id in top_product_ids:
        brand = data[data['product_id'] == product_id]['brand'].iloc[0]  # Get brand
        recommendations.append((product_id, brand))

    return recommendations

# Example usage:
user_id_to_recommend = data['user_id'].iloc[0]  # Example user, change as needed.
recommendations = get_recommendations_with_brands(user_id_to_recommend)
print(f"Recommendations for user {user_id_to_recommend}: {recommendations}")

Epoch 1/10
[1m513/513[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 31ms/step - accuracy: 0.0390 - loss: 0.6776 - val_accuracy: 0.0538 - val_loss: 0.6606
Epoch 2/10
[1m513/513[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 33ms/step - accuracy: 0.0612 - loss: 0.6485 - val_accuracy: 0.0567 - val_loss: 0.6605
Epoch 3/10
[1m513/513[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 30ms/step - accuracy: 0.0645 - loss: 0.6416 - val_accuracy: 0.0560 - val_loss: 0.6628
Epoch 4/10
[1m513/513[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 33ms/step - accuracy: 0.0652 - loss: 0.6371 - val_accuracy: 0.0560 - val_loss: 0.6671
Epoch 5/10
[1m513/513[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 30ms/step - accuracy: 0.0653 - loss: 0.6336 - val_accuracy: 0.0560 - val_loss: 0.6701
Epoch 6/10
[1m513/513[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 32ms/step - accuracy: 0.0662 - loss: 0.6298 - val_accuracy: 0.0553 - val_loss: 0.6732
Epoch 7/10
[1m5