## Understanding Customer Preferences

The model predicts the make of the vehicle a customer is likely to purchase based on their appraised vehicle's details.

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from xgboost import XGBClassifier

In [None]:
!git clone https://github.com/heychhavi/Winter-2024.git

fatal: destination path 'Winter-2024' already exists and is not an empty directory.


In [None]:
data = pd.read_csv('/content/Winter-2024/data/winter_2024_data.csv')
data

Unnamed: 0,price,appraisal_offer,make_appraisal,model_appraisal,trim_level_appraisal,model_year_appraisal,mileage_appraisal,engine_appraisal,cylinders_appraisal,mpg_city_appraisal,...,cylinders,mpg_city,mpg_highway,horsepower,fuel_capacity,vehicle_type,color,online_appraisal_flag,state,days_since_offer
0,19500,18800,Kia,Sportage,Not Premium,2014,19300,2.4L,4,21.0,...,3,24.0,34.0,134,16,Small SUV,Gray,0.0,CA,0
1,24000,9000,Ford,Escape,Premium,2012,39300,1.6L,4,22.0,...,6,19.0,27.0,271,15,Small SUV,Black,0.0,IL,0
2,24000,9800,Dodge,Challenger,Not Premium,2013,106100,3.6L,6,19.0,...,4,,,248,12,Luxury,White,0.0,CA,0
3,33000,14600,Toyota,Tacoma,Premium,2014,105800,3.5L,6,19.0,...,6,18.0,24.0,380,16,Medium SUV,Black,1.0,CA,1
4,14200,19500,Jeep,Cherokee,Not Premium,2015,20400,2.4L,4,22.0,...,4,,,188,14,Small SUV,Gold,0.0,FL,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
132712,16500,3400,Hyundai,Elantra,Premium,2010,97800,1.8L,4,28.0,...,4,22.0,32.0,228,16,Small SUV,White,0.0,CA,0
132713,20200,10500,Honda,Civic,Not Premium,2012,40300,2.0L,4,31.0,...,4,22.0,31.0,228,16,Small SUV,Black,0.0,CA,0
132714,21700,5300,Mercedes-Benz,ML350,,2004,75800,3.5L,6,15.0,...,4,22.0,31.0,228,16,Small SUV,Black,0.0,CA,0
132715,21700,8300,Kia,Forte,Not Premium,2013,47300,2.0L,4,29.0,...,4,22.0,31.0,228,16,Small SUV,White,0.0,CA,0


In [None]:


features = data.drop('make_appraisal', axis=1)
target = data['make_appraisal']


X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

numerical_features = X_train.select_dtypes(include=['int64', 'float64']).columns

categorical_features = X_train.select_dtypes(include=['object']).columns


numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])


categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])


preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ])


model = Pipeline(steps=[('preprocessor', preprocessor),
                        ('classifier', RandomForestClassifier(random_state=42))])

model.fit(X_train, y_train)


y_pred = model.predict(X_test)


print(classification_report(y_test, y_pred))


               precision    recall  f1-score   support

         Audi       1.00      0.33      0.50         3
          BMW       1.00      0.83      0.91         6
        Buick       1.00      0.98      0.99       222
     Cadillac       1.00      1.00      1.00        28
    Chevrolet       1.00      1.00      1.00      2902
     Chrysler       0.99      1.00      0.99       466
        Dodge       1.00      1.00      1.00      1160
         Fiat       1.00      1.00      1.00        56
         Ford       0.99      1.00      1.00      3570
          GMC       1.00      0.98      0.99       524
      Genesis       1.00      0.50      0.67         2
        Honda       1.00      1.00      1.00      2629
      Hyundai       1.00      1.00      1.00      1914
     Infiniti       1.00      0.98      0.99       145
       Jaguar       1.00      0.86      0.92         7
         Jeep       1.00      1.00      1.00      2170
          Kia       1.00      1.00      1.00      1193
   Land R

## Collaborativ filtering

In [None]:
data.reset_index(inplace=True)
data.rename(columns={'index': 'customer_id'}, inplace=True)

In [None]:
data['proxy_rating'] = data['appraisal_offer'] / data['price']

In [None]:
max_rating = data['proxy_rating'].max()
min_rating = data['proxy_rating'].min()
data['normalized_rating'] = 1 + (data['proxy_rating'] - min_rating) / (max_rating - min_rating) * 4

In [None]:
!pip install scikit-surprise



In [None]:
from surprise import Reader, Dataset, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy


reader = Reader(rating_scale=(data['normalized_rating'].min(), data['normalized_rating'].max()))


data_surprise = Dataset.load_from_df(data[['customer_id', 'make_appraisal', 'normalized_rating']].dropna(), reader)

# Split the dataset for training and testing
trainset, testset = train_test_split(data_surprise, test_size=0.25, random_state=42)

# SVD algorithm
algo = SVD()


algo.fit(trainset)

# Predict ratings for all pairs (u, i) that are NOT in the training set
predictions = algo.test(testset)

#  RMSE
accuracy.rmse(predictions)


RMSE: 0.2897


0.2896511432699224

In [None]:
# Assuming you want to generate recommendations for user with customer_id = 1
user_id = 1
user_ratings = trainset.ur[trainset.to_inner_uid(user_id)]
items = data['make_appraisal'].unique()

# Predict ratings for all items
predicted_ratings = [algo.predict(user_id, item).est for item in items]

# Get the top 5 recommended items
top_5_recommended_items = sorted(zip(items, predicted_ratings), key=lambda x: x[1], reverse=True)[:5]


In [None]:

unique_customer_ids = data['customer_id'].unique()

# Initialize a dictionary to store the recommendations for each customer
all_customer_recommendations = {}

# Get the unique items (vehicle makes)
items = data['make_appraisal'].unique()

for user_id in unique_customer_ids:
    # Predict ratings for all items for this user
    predicted_ratings = [algo.predict(user_id, item).est for item in items]

    # Get the top 5 recommended items for this user
    top_5_recommended_items = sorted(zip(items, predicted_ratings), key=lambda x: x[1], reverse=True)[:5]

    # Store the recommendations in the dictionary
    all_customer_recommendations[user_id] = top_5_recommended_items

# Now, all_customer_recommendations has the top 5 recommendations for each customer


In [None]:
recommendations_df = pd.DataFrame.from_dict(all_customer_recommendations, orient='index')
recommendations_df.columns = [f'Recommendation_{i+1}' for i in range(5)]


recommendations_df.reset_index(inplace=True)
recommendations_df.rename(columns={'index': 'customer_id'}, inplace=True)


In [None]:
recommendations_df

Unnamed: 0,customer_id,Recommendation_1,Recommendation_2,Recommendation_3,Recommendation_4,Recommendation_5
0,0,"(Porsche, 1.80363022698151)","(Genesis, 1.7642357352861628)","(Jaguar, 1.6929801497640138)","(Land Rover, 1.6729047813300997)","(BMW, 1.6690994998668673)"
1,1,"(Genesis, 1.7671474135004746)","(BMW, 1.6788299622537333)","(Ram, 1.6505744197833991)","(Porsche, 1.6324574175844153)","(Jaguar, 1.5918775271303405)"
2,2,"(Porsche, 1.7745417313532355)","(Jaguar, 1.7124386416750021)","(BMW, 1.67329257261872)","(Genesis, 1.6537164107858842)","(Ram, 1.6521643254276102)"
3,3,"(Porsche, 1.7986303863370332)","(Jaguar, 1.698493139284633)","(Ram, 1.6711164156793872)","(Genesis, 1.642999795841386)","(Land Rover, 1.6144613955953955)"
4,4,"(Jaguar, 1.9146447308580392)","(BMW, 1.8806570455380167)","(Land Rover, 1.8315684003360795)","(Porsche, 1.8164152226247372)","(Ram, 1.7498457526941056)"
...,...,...,...,...,...,...
132712,132712,"(Porsche, 1.8527997601430868)","(BMW, 1.6980567990869093)","(Cadillac, 1.5999852268515755)","(Genesis, 1.5973353836492568)","(Jaguar, 1.5754389556705566)"
132713,132713,"(Porsche, 1.9394185304501645)","(Ram, 1.6634349223258136)","(Jaguar, 1.6588271192654804)","(Infiniti, 1.5796100550407508)","(BMW, 1.5777383798333648)"
132714,132714,"(Jaguar, 1.8532989559192876)","(Porsche, 1.7947973379965374)","(BMW, 1.6878939595070623)","(Ram, 1.6492923212576072)","(Genesis, 1.5700411000468186)"
132715,132715,"(Porsche, 1.7602983877163456)","(Genesis, 1.7149083043802957)","(BMW, 1.7086217610116616)","(Ram, 1.6676236413309284)","(Jaguar, 1.5553971008994063)"
