<a href="https://colab.research.google.com/github/YassineKariim/AziureDmoDsbd/blob/master/recomendation_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Embedding, Concatenate, Flatten
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler


# Identify numerical and categorical columns
numerical_cols = X_train.select_dtypes(include=['float64', 'int64']).columns
categorical_cols = X_train.select_dtypes(include=['object']).columns

# Encode categorical variables using LabelEncoder
label_encoders = {}
for column in categorical_cols:
    label_encoders[column] = LabelEncoder()
    X_train[column] = label_encoders[column].fit_transform(X_train[column])
    X_test[column] = label_encoders[column].transform(X_test[column])

# Scale numerical features using StandardScaler
scaler = StandardScaler()
X_train[numerical_cols] = scaler.fit_transform(X_train[numerical_cols])
X_test[numerical_cols] = scaler.transform(X_test[numerical_cols])

# Define the model architecture
def create_recommendation_model(num_users, num_items, embedding_dim=10):
    user_input = Input(shape=(X_train.shape[1],))

    # Embedding layer for user features (if needed)
    # user_embedding = Embedding(input_dim=num_users, output_dim=embedding_dim)(user_input)
    # user_embedding = Flatten()(user_embedding)

    # Dense layer for numerical features
    dense_layer = Dense(64, activation='relu')(user_input)

    # Output layer
    output_layer = Dense(1, activation='linear')(dense_layer)

    model = Model(inputs=user_input, outputs=output_layer)
    model.compile(loss='mean_squared_error', optimizer='adam')

    return model

# Create the model
num_users = len(label_encoders['Gender'].classes_)
num_items = len(label_encoders['Purchase_Categories'].classes_)
model = create_recommendation_model(num_users=num_users, num_items=num_items)

# Train the model
model.fit(X_train, y_train, batch_size=32, epochs=10, validation_data=(X_test, y_test))


In [None]:
import tensorflow as tf
print(tf.__version__)

2.12.0


In [None]:
pip install --upgrade tensorflow

Collecting tensorflow
  Downloading tensorflow-2.13.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (524.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m524.1/524.1 MB[0m [31m987.0 kB/s[0m eta [36m0:00:00[0m
Collecting keras<2.14,>=2.13.1 (from tensorflow)
  Downloading keras-2.13.1-py3-none-any.whl (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m56.9 MB/s[0m eta [36m0:00:00[0m
Collecting tensorboard<2.14,>=2.13 (from tensorflow)
  Downloading tensorboard-2.13.0-py3-none-any.whl (5.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.6/5.6 MB[0m [31m55.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tensorflow-estimator<2.14,>=2.13.0 (from tensorflow)
  Downloading tensorflow_estimator-2.13.0-py2.py3-none-any.whl (440 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m440.8/440.8 kB[0m [31m38.1 MB/s[0m eta [36m0:00:00[0m
Collecting typing-extensions<4.6.0,>=3.6

In [None]:
import pandas as pd
from google.colab import files
uploaded = files.upload()


# Convert the data dictionary to a DataFrame
data = pd.read_csv("vrdataset.csv")

# Convert Timestamp to numerical representation (time elapsed since reference time)
data["Timestamp"] = pd.to_datetime(data["Timestamp"]).astype(int) // 10**9

# Create mapping dictionaries for encoding categorical columns
gender_mapping = {"Female": 0, "Male": 1, "Prefer not to say": 2}
frequency_mapping = {"Few times a month": 0, "Less than once a month": 1, "Once a month": 2, "Multiple times a week": 3}
category_mapping = {
    "Beauty and Personal Care": 0,
    "Clothing and Fashion": 1,
    "Groceries and Gourmet Food;Clothing and Fashion": 2
}
review_mapping = {"No": 0, "Sometimes": 1, "Often": 2, "Heavily": 3, "Occasionally": 4}

# Apply the mappings to the corresponding columns
data["Gender"] = data["Gender"].map(gender_mapping)
data["Purchase_Frequency"] = data["Purchase_Frequency"].map(frequency_mapping)
data["Purchase_Categories"] = data["Purchase_Categories"].map(category_mapping)
data["Customer_Reviews_Importance"] = data["Customer_Reviews_Importance"].map(review_mapping)
data["Review_Left"] = data["Review_Left"].map(review_mapping)
data["Review_Reliability"] = data["Review_Reliability"].map(review_mapping)
data["Review_Helpfulness"] = data["Review_Helpfulness"].map(review_mapping)
data["Personalized_Recommendation_Frequency"] = data["Personalized_Recommendation_Frequency"].map(review_mapping)
data["Recommendation_Helpfulness"] = data["Recommendation_Helpfulness"].replace("Yes", 1)
data["Rating_Accuracy"] = [None] * len(data)

print(data)


Saving vrdataset.csv to vrdataset (27).csv
      Timestamp  age  Gender  Purchase_Frequency  Purchase_Categories  \
0    1685905099   23     0.0                 0.0                  0.0   
1    1685908844   23     0.0                 2.0                  1.0   
2    1685918096   24     2.0                 0.0                  2.0   
3    1685918580   24     0.0                 2.0                  NaN   
4    1685919486   22     0.0                 1.0                  NaN   
..          ...  ...     ...                 ...                  ...   
597  1686605522   23     0.0                 NaN                  0.0   
598  1686605573   23     0.0                 NaN                  1.0   
599  1686605639   23     0.0                 2.0                  0.0   
600  1686626840   23     0.0                 0.0                  NaN   
601  1686926765   23     0.0                 NaN                  1.0   

     Personalized_Recommendation_Frequency            Unnamed: 6  \
0           

In [None]:
print(data)

                          Timestamp  age             Gender  \
0    2023/06/04 1:28:19 PM GMT+5:30   23             Female   
1    2023/06/04 2:30:44 PM GMT+5:30   23             Female   
2    2023/06/04 5:04:56 PM GMT+5:30   24  Prefer not to say   
3    2023/06/04 5:13:00 PM GMT+5:30   24             Female   
4    2023/06/04 5:28:06 PM GMT+5:30   22             Female   
..                              ...  ...                ...   
597  2023/06/12 4:02:02 PM GMT+5:30   23             Female   
598  2023/06/12 4:02:53 PM GMT+5:30   23             Female   
599  2023/06/12 4:03:59 PM GMT+5:30   23             Female   
600  2023/06/12 9:57:20 PM GMT+5:30   23             Female   
601  2023/06/16 9:16:05 AM GMT+5:30   23             Female   

         Purchase_Frequency  \
0         Few times a month   
1              Once a month   
2         Few times a month   
3              Once a month   
4    Less than once a month   
..                      ...   
597             Once a wee

In [None]:
data.dropna()

Unnamed: 0,Timestamp,age,Gender,Purchase_Frequency,Purchase_Categories,Personalized_Recommendation_Frequency,Unnamed: 6,Customer_Reviews_Importance,Cart_Completion_Frequency,Review_Left,Review_Reliability,Review_Helpfulness,Personalized_Recommendation_Frequency.1,Recommendation_Helpfulness,Rating_Accuracy,Shopping_Satisfaction,Service_Appreciation
0,2023/06/04 1:28:19 PM GMT+5:30,23,Female,Few times a month,Beauty and Personal Care,Yes,Few times a week,1,Sometimes,Yes,Occasionally,Yes,2,Yes,1,1,Competitive prices
1,2023/06/04 2:30:44 PM GMT+5:30,23,Female,Once a month,Clothing and Fashion,Yes,Few times a month,1,Often,No,Heavily,Yes,2,Sometimes,3,2,Wide product selection
2,2023/06/04 5:04:56 PM GMT+5:30,24,Prefer not to say,Few times a month,Groceries and Gourmet Food;Clothing and Fashion,No,Few times a month,2,Sometimes,No,Occasionally,No,4,No,3,3,Competitive prices
3,2023/06/04 5:13:00 PM GMT+5:30,24,Female,Once a month,Beauty and Personal Care;Clothing and Fashion;...,Sometimes,Few times a month,5,Sometimes,Yes,Heavily,Yes,3,Sometimes,3,4,Competitive prices
4,2023/06/04 5:28:06 PM GMT+5:30,22,Female,Less than once a month,Beauty and Personal Care;Clothing and Fashion,Yes,Few times a month,1,Sometimes,No,Heavily,Yes,4,Yes,2,2,Competitive prices
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
597,2023/06/12 4:02:02 PM GMT+5:30,23,Female,Once a week,Beauty and Personal Care,Sometimes,Few times a week,4,Sometimes,Yes,Moderately,Sometimes,3,Sometimes,3,4,Competitive prices
598,2023/06/12 4:02:53 PM GMT+5:30,23,Female,Once a week,Clothing and Fashion,Sometimes,Few times a week,3,Sometimes,Yes,Heavily,Sometimes,3,Sometimes,3,3,Product recommendations
599,2023/06/12 4:03:59 PM GMT+5:30,23,Female,Once a month,Beauty and Personal Care,Sometimes,Few times a week,3,Sometimes,Yes,Occasionally,Sometimes,3,Sometimes,2,3,Wide product selection
600,2023/06/12 9:57:20 PM GMT+5:30,23,Female,Few times a month,Beauty and Personal Care;Clothing and Fashion;...,Yes,Few times a month,1,Often,No,Heavily,Yes,2,Yes,2,2,Wide product selection


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler


# Assuming you have already loaded the data into a DataFrame named 'data'.
data = data
# Drop irrelevant columns and extract the target variable
target_variable = 'Personalized_Recommendation_Frequency'
X = data.drop(columns=[target_variable])
y = data[target_variable]

# Identify numerical and categorical columns
numerical_cols = X.select_dtypes(include=['float64', 'int64']).columns
categorical_cols = X.select_dtypes(include=['object']).columns

# Encode categorical variables using LabelEncoder
label_encoders = {}
for column in categorical_cols:
    label_encoders[column] = LabelEncoder()
    X[column] = label_encoders[column].fit_transform(X[column])

# Scale numerical features using StandardScaler
scaler = StandardScaler()
X[numerical_cols] = scaler.fit_transform(X[numerical_cols])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Display the shapes of the split datasets
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

X_train shape: (481, 16)
X_test shape: (121, 16)
y_train shape: (481,)
y_test shape: (121,)


In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Embedding, Concatenate, Flatten
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler


# Load the data (replace 'vrdataset.csv' with the actual file name)
data = data
# Drop irrelevant columns and extract the target variable
target_variable = 'Personalized_Recommendation_Frequency'
X = data.drop(columns=[target_variable])
y = data[target_variable]

# Identify numerical and categorical columns
numerical_cols = X.select_dtypes(include=['float64', 'int64']).columns
categorical_cols = X.select_dtypes(include=['object']).columns

# Encode categorical variables using LabelEncoder
label_encoders = {}
for column in categorical_cols:
    label_encoders[column] = LabelEncoder()
    X[column] = label_encoders[column].fit_transform(X[column])
# Make sure all numerical columns are in float format
X[numerical_cols] = X[numerical_cols].astype(float)


# Scale numerical features using StandardScaler
scaler = StandardScaler()
X[numerical_cols] = scaler.fit_transform(X[numerical_cols])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Define the model architecture
def create_recommendation_model(input_dim, embedding_dim=10):
    user_input = Input(shape=(input_dim,))

    # Embedding layer for user features (if needed)
    user_embedding = Embedding(input_dim=input_dim, output_dim=embedding_dim)(user_input)
    user_embedding = Flatten()(user_embedding)

    # Dense layer for numerical features
    dense_layer = Dense(64, activation='relu')(user_embedding)

    # Output layer
    output_layer = Dense(1, activation='linear')(dense_layer)

    model = Model(inputs=user_input, outputs=output_layer)
    model.compile(loss='mean_squared_error', optimizer='adam')

    return model


# Create the model
model = create_recommendation_model(input_dim=X_train.shape[1])

# Train the model
model.fit(X_train, y_train, batch_size=32, epochs=10, validation_data=(X_test, y_test))


Epoch 1/10


UnimplementedError: ignored