In [1]:
import pandas as pd

# Load the CSV file
data = pd.read_csv('data-dummy.csv')

# Display the first few rows
print(data.head())


                     id   accountId   amount                 date  \
0     BNI_ACC_17_TRAN_1  BNI_ACC_17   351500  12/01/2023 00:00:00   
1    BNI_ACC_17_TRAN_10  BNI_ACC_17   500000  12/01/2023 00:00:00   
2   BNI_ACC_17_TRAN_100  BNI_ACC_17  2200000  12/01/2023 00:00:00   
3  BNI_ACC_17_TRAN_1000  BNI_ACC_17   300000  12/01/2023 00:00:00   
4  BNI_ACC_17_TRAN_1001  BNI_ACC_17  1499000  12/01/2023 00:00:00   

   personeticsMCC channel clientTransactionType  \
0              26  Mobile   TRANSFER_ANTAR_BANK   
1              26  Mobile   TRANSFER_ANTAR_BANK   
2            8999  Mobile   TRANSFER_ANTAR_BANK   
3              26  Mobile   TRANSFER_ANTAR_BANK   
4              26  Mobile   TRANSFER_ANTAR_BANK   

                     description                       deviceId direction  \
0                SPP anak Gilang                SPP ANAK GILANG         D   
1               Bayar Myrepublic               BAYAR MYREPUBLIC         D   
2                  Gaji desember               

In [2]:
# Assuming 'accountId' as user and 'category' as product
user_product_interactions = data[['accountId', 'category', 'amount']]

# Handle missing values if necessary
user_product_interactions = user_product_interactions.fillna(0)


In [3]:
# Create a user-item matrix with interaction scores (amount)
user_item_matrix = user_product_interactions.pivot_table(index='accountId', columns='category', values='amount', aggfunc='sum').fillna(0)

# Display the matrix
print(user_item_matrix.head())


category        Belanja  Hadiah & Amal    Hiburan  Kesehatan  \
accountId                                                      
BNI_ACC_17  605716499.0            0.0   150000.0  4445000.0   
BNI_ACC_18  570235422.0       570000.0  5599200.0  6123210.0   
BNI_ACC_19  212938110.0            0.0  4179000.0        0.0   
BNI_ACC_20  226882982.0      1000000.0  9562581.0   155000.0   
BNI_ACC_21  268270357.0            0.0  8492011.0    90000.0   

category    Makanan & Minuman  Pendidikan  Perawatan Diri     Pinjaman  \
accountId                                                                
BNI_ACC_17         29922000.0   7152000.0       2900000.0  430491788.0   
BNI_ACC_18         45214067.0    900000.0       7260500.0  387075060.0   
BNI_ACC_19         11331000.0   4970000.0       1000000.0  240196562.0   
BNI_ACC_20          9818500.0     99000.0        470880.0   86306555.0   
BNI_ACC_21         21285100.0         0.0       2674000.0   93542051.0   

category    Tabungan & Investasi

In [4]:
from sklearn.decomposition import TruncatedSVD
import numpy as np

# Apply SVD to the user-item matrix
svd = TruncatedSVD(n_components=13)
user_factors = svd.fit_transform(user_item_matrix)
item_factors = svd.components_.T

# Calculate the predicted interaction matrix
predicted_matrix = np.dot(user_factors, item_factors.T)

# Convert the predicted matrix back to a DataFrame for easier use
predicted_df = pd.DataFrame(predicted_matrix, index=user_item_matrix.index, columns=user_item_matrix.columns)
print(predicted_df.head())


category         Belanja  Hadiah & Amal    Hiburan     Kesehatan  \
accountId                                                          
BNI_ACC_17  6.057165e+08   4.286917e-08   150000.0  4.445000e+06   
BNI_ACC_18  5.702354e+08   5.700000e+05  5599200.0  6.123210e+06   
BNI_ACC_19  2.129381e+08  -2.958061e-08  4179000.0 -7.256271e-09   
BNI_ACC_20  2.268830e+08   1.000000e+06  9562581.0  1.550000e+05   
BNI_ACC_21  2.682704e+08  -2.912884e-08  8492011.0  9.000000e+04   

category    Makanan & Minuman    Pendidikan  Perawatan Diri      Pinjaman  \
accountId                                                                   
BNI_ACC_17         29922000.0  7.152000e+06       2900000.0  4.304918e+08   
BNI_ACC_18         45214067.0  9.000000e+05       7260500.0  3.870751e+08   
BNI_ACC_19         11331000.0  4.970000e+06       1000000.0  2.401966e+08   
BNI_ACC_20          9818500.0  9.900000e+04        470880.0  8.630655e+07   
BNI_ACC_21         21285100.0 -6.164658e-09       2674000.0  

In [5]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Create TF-IDF features from the 'desc_processed' column
vectorizer = TfidfVectorizer(stop_words='english')
product_features = vectorizer.fit_transform(data['desc_processed'])

# Calculate the cosine similarity between products
cosine_sim = cosine_similarity(product_features)

# Create a function to recommend products
def recommend_products(product_ids, cosine_sim=cosine_sim):
    sim_scores = list(enumerate(cosine_sim[product_ids]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:6]
    recommended_product_indices = [i[0] for i in sim_scores]
    return data['category'].iloc[recommended_product_indices]

# Example: Recommend products similar to a given product
recommended_products = recommend_products(product_ids=10)
print(recommended_products)


1102    Uang Keluar
0       Uang Keluar
1       Uang Keluar
2           Belanja
3       Uang Keluar
Name: category, dtype: object


In [6]:
from sklearn.metrics import mean_squared_error

# Evaluate the model using Mean Squared Error
mse = mean_squared_error(user_item_matrix.values, predicted_matrix)
print(f'MSE: {mse}')


MSE: 3.846820360681137e-13
