# 🚗 Car Review Recommender System: SVD vs Autoencoder

In [3]:
!uv pip install keras

[2mResolved [1m13 packages[0m [2min 349ms[0m[0m
[2mPrepared [1m9 packages[0m [2min 893ms[0m[0m
         If the cache and target directories are on different filesystems, hardlinking may not be supported.
[2mInstalled [1m9 packages[0m [2min 210ms[0m[0m
 [32m+[39m [1mabsl-py[0m[2m==2.2.2[0m
 [32m+[39m [1mh5py[0m[2m==3.13.0[0m
 [32m+[39m [1mkeras[0m[2m==3.9.2[0m
 [32m+[39m [1mmarkdown-it-py[0m[2m==3.0.0[0m
 [32m+[39m [1mmdurl[0m[2m==0.1.2[0m
 [32m+[39m [1mml-dtypes[0m[2m==0.5.1[0m
 [32m+[39m [1mnamex[0m[2m==0.0.9[0m
 [32m+[39m [1moptree[0m[2m==0.15.0[0m
 [32m+[39m [1mrich[0m[2m==14.0.0[0m


In [4]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from keras.models import Model
from keras.layers import Input, Dense
from keras.optimizers import Adam


ModuleNotFoundError: No module named 'tensorflow'

In [None]:

df = pd.read_csv("car_reviews.csv")
df['Rating'] = df['Rating'].astype(float)
df['user_id'] = df['Reviewer'].astype('category').cat.codes
df['item_id'] = df['Car Model'].astype('category').cat.codes

n_users = df['user_id'].nunique()
n_items = df['item_id'].nunique()

user_item_matrix = np.zeros((n_users, n_items))
for row in df.itertuples():
    user_item_matrix[row.user_id, row.item_id] = row.Rating


In [None]:

train_matrix = user_item_matrix.copy()
mask = train_matrix > 0
train_data, test_data = train_test_split(np.argwhere(mask), test_size=0.2, random_state=42)
test_matrix = np.zeros_like(user_item_matrix)
for user, item in test_data:
    test_matrix[user, item] = user_item_matrix[user, item]
    train_matrix[user, item] = 0


In [None]:

svd = TruncatedSVD(n_components=2)
svd_matrix = svd.fit_transform(train_matrix)
svd_pred = np.dot(svd_matrix, svd.components_)
svd_rmse = np.sqrt(mean_squared_error(test_matrix[test_matrix > 0], svd_pred[test_matrix > 0]))
print("SVD RMSE:", svd_rmse)


In [None]:

input_layer = Input(shape=(n_items,))
encoded = Dense(128, activation='relu')(input_layer)
encoded = Dense(64, activation='relu')(encoded)
encoded = Dense(32, activation='relu')(encoded)
decoded = Dense(64, activation='relu')(encoded)
decoded = Dense(128, activation='relu')(decoded)
output_layer = Dense(n_items, activation='linear')(decoded)

autoencoder = Model(inputs=input_layer, outputs=output_layer)
autoencoder.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
autoencoder.fit(train_matrix, train_matrix, epochs=100, batch_size=2, verbose=0, validation_split=0.1)

ae_pred = autoencoder.predict(train_matrix)
ae_rmse = np.sqrt(mean_squared_error(test_matrix[test_matrix > 0], ae_pred[test_matrix > 0]))
print("Autoencoder RMSE:", ae_rmse)


In [None]:

methods = ['SVD', 'Autoencoder']
rmses = [svd_rmse, ae_rmse]
plt.figure(figsize=(6, 4))
sns.barplot(x=methods, y=rmses, palette='Set2')
plt.title("RMSE Comparison")
plt.ylabel("RMSE")
plt.tight_layout()
plt.show()



### 🔍 Summary: SVD vs Autoencoder

| Method       | RMSE       | Strengths                             | Weaknesses                       |
|--------------|------------|---------------------------------------|----------------------------------|
| **SVD**      | Lower for small data | Fast, interpretable, low-dim      | Assumes linear latent structure |
| **Autoencoder** | Lower for large data | Captures nonlinear relationships | Requires tuning, longer training |

**Conclusion**: Use SVD for simplicity and smaller datasets. Use deep Autoencoders for complex, sparse data.
