In [9]:
#read dataset
import pandas as pd
from sklearn.preprocessing import LabelEncoder
df = pd.read_csv("FixedDataset.csv")
df = df.drop(df.columns[0], axis=1)
df = df.drop_duplicates() #drop duplicates data


In [10]:
df.head()

Unnamed: 0,customer_id,page_views,time_spent,product_id,purchase_date,category,price,ratings
0,1,25,120,101,2023-01-01,Electronics,500,4.5
6,5,22,110,101,2023-01-05,Electronics,500,4.5
12,1,25,120,105,2023-01-05,Electronics,800,4.8
18,2,20,90,102,2023-01-02,Clothing,50,3.8
19,2,25,120,102,2023-01-02,Clothing,50,3.8


In [12]:
label_encoder = LabelEncoder()

# Terapkan label encoding pada kolom 'kategori'
df['category'] = label_encoder.fit_transform(df['category'])

In [13]:
df.head(5)

Unnamed: 0,customer_id,page_views,time_spent,product_id,purchase_date,category,price,ratings
0,1,25,120,101,2023-01-01,2,500,4.5
6,5,22,110,101,2023-01-05,2,500,4.5
12,1,25,120,105,2023-01-05,2,800,4.8
18,2,20,90,102,2023-01-02,1,50,3.8
19,2,25,120,102,2023-01-02,1,50,3.8


Collaborative Filtering (K-Nearest Neighbors):

In [14]:
from sklearn.neighbors import NearestNeighbors

# Fitur yang akan digunakan untuk rekomendasi
features = ['page_views', 'time_spent', 'category', 'price', 'ratings']

# Memilih data yang akan digunakan untuk pelatihan model
train_data = df[features]

# Inisialisasi model K-Nearest Neighbors
knn_model = NearestNeighbors(metric='cosine', algorithm='brute')

# Melatih model
knn_model.fit(train_data)

# Menggunakan model untuk membuat rekomendasi
# Misalnya, untuk mendapatkan 5 rekomendasi produk untuk customer dengan customer_id=1
customer_data = df[df['customer_id'] == 1][features].iloc[0]
distances, indices = knn_model.kneighbors([customer_data], n_neighbors=5)

# Indeks produk yang direkomendasikan
recommended_product_indices = indices.flatten()

print("Produk yang direkomendasikan:")
print(df.iloc[recommended_product_indices])


Produk yang direkomendasikan:
     customer_id  page_views  time_spent  product_id        purchase_date  \
0              1          25         120         101           2023-01-01   
88            85          12          67         113  2022-04-09 00:00:00   
121           55          18          87          46  2023-08-24 00:00:00   
6              5          22         110         101           2023-01-05   
81            71          30          97          99  2023-03-02 00:00:00   

     category  price  ratings  
0           2    500      4.5  
88          3    291      1.3  
121         3    389      1.7  
6           2    500      4.5  
81          0    418      4.8  




Content-Based Filtering (Decision Tree Classifier):

In [19]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error

# Memilih data yang akan digunakan untuk pelatihan model
X_train = df[features]
y_train = df['product_id']

# Inisialisasi dan melatih model Decision Tree Classifier
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)

# Membuat prediksi produk untuk customer dengan customer_id=1
customer_data = df[df['customer_id'] == 1][features].iloc[0]
predicted_product_id = dt_model.predict([customer_data])

print("Produk yang direkomendasikan untuk customer_id=1:", predicted_product_id)

# Mengukur akurasi model (contoh saja, karena tidak relevan untuk rekomendasi produk)
y_pred = dt_model.predict(X_train)
accuracy = accuracy_score(y_train, y_pred)
# Evaluasi kinerja model dengan RMSE (Root Mean Squared Error)
rmse = mean_squared_error(y_train, y_pred, squared=False)
print("RMSE:", rmse)
print("Akurasi model:", accuracy)


Produk yang direkomendasikan untuk customer_id=1: [101]
RMSE: 0.0
Akurasi model: 1.0




In [17]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
# Memilih data yang akan digunakan untuk pelatihan model

X_train = df[features]
y_train = df['product_id']

# Misalkan X_train dan y_train adalah fitur dan target yang digunakan untuk melatih model
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Inisialisasi dan melatih model Random Forest Regressor
rf_regressor = RandomForestRegressor(random_state=42)
rf_regressor.fit(X_train, y_train)

# Lakukan prediksi untuk data uji
y_pred = rf_regressor.predict(X_test)

# Evaluasi kinerja model dengan RMSE (Root Mean Squared Error)
rmse = mean_squared_error(y_test, y_pred, squared=False)
print("RMSE:", rmse)


RMSE: 44.19198435519726
