# System Recommendation

In [246]:
import pandas as pd

df = pd.read_csv("data/fh4_cars_cleaned.csv")
df

Unnamed: 0,name,class,pi,category,transmission,power_hp,weight_lbs,price,speed,handling,acceleration,breaking
0,2017 Abarth 124 SPIDER,C,577,MODERN SPORTS CARS,RWD,165,2478,43500,6.0,5.9,5.6,5.4
1,2016 Abarth 695 BIPOSTO,B,607,HOT HATCH,FWD,186,2198,48000,5.7,6.3,5.9,6.4
2,1980 Abarth FIAT 131,D,449,CLASSIC RALLY,RWD,139,2161,38000,5.5,4.7,5.5,4.5
3,1968 Abarth 595 ESSEESSE,D,100,CULT CARS,RWD,28,1257,35000,3.9,4.0,3.7,4.0
4,2017 Acura NSX,S1,850,MODERN SUPERCARS,AWD,572,3803,170000,7.4,7.6,9.6,8.3
...,...,...,...,...,...,...,...,...,...,...,...,...
747,2017 VUHL 05RR,S2,926,EXTREME TRACK TOYS,RWD,385,1598,250000,6.9,10.0,9.1,10.0
748,2016 W Motors LYKAN HYPERSPORT,S2,907,HYPERCARS,RWD,780,3042,3400000,8.7,8.4,7.7,9.0
749,1945 WILLYS MB JEEP,D,100,OFFROAD,AWD,60,2136,40000,3.9,4.2,4.0,4.0
750,2019 Zenvo TSR-S,S2,972,HYPERCARS,RWD,1177,3411,1200000,8.9,9.9,8.2,10.0


In [247]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 752 entries, 0 to 751
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   name          752 non-null    object 
 1   class         752 non-null    object 
 2   pi            752 non-null    int64  
 3   category      752 non-null    object 
 4   transmission  752 non-null    object 
 5   power_hp      752 non-null    int64  
 6   weight_lbs    752 non-null    int64  
 7   price         752 non-null    int64  
 8   speed         752 non-null    float64
 9   handling      752 non-null    float64
 10  acceleration  752 non-null    float64
 11  breaking      752 non-null    float64
dtypes: float64(4), int64(4), object(4)
memory usage: 70.6+ KB


## Encoding

### Category column

In [248]:
df["category"].value_counts().index

Index(['EXTREME TRACK TOYS', 'RETRO SPORTS CARS', 'MODERN SUPERCARS',
       'CLASSIC MUSCLE', 'TRACK TOYS', 'HYPERCARS', 'SUPER SALOONS',
       'RETRO SUPERCARS', 'CLASSIC RACERS', 'OFFROAD', 'MODERN SPORTS CARS',
       'CLASSIC SPORTS CARS', 'RALLY MONSTERS', 'CULT CARS',
       'RODS AND CUSTOMS', 'RARE CLASSICS', 'SPORTS UTILITY HEROES',
       'SUPER GT', 'MODERN MUSCLE', 'RETRO SALOONS', 'EXTREME OFFROAD',
       'RETRO HOT HATCH', 'HOT HATCH', 'RETRO RALLY', 'SUPER HOT HATCH',
       'RETRO MUSCLE', 'VANS AND UTILITY', 'OFFROADS BUGGIES', 'CLASSIC RALLY',
       'GT CARS', 'MODERN RALLY', 'VINTAGE RACERS', 'TRUCKS'],
      dtype='object', name='category')

### Transmissions column

In [249]:
df["transmission"].value_counts()

transmission
RWD    514
AWD    188
FWD     50
Name: count, dtype: int64

In [250]:
# encoded_df = pd.get_dummies(df[["transmission"]])
encoded_df = pd.get_dummies(df[["category", "transmission"]])

### Class column

In [251]:
df["class"].value_counts()

class
S1    168
A     163
D     125
C     116
B     113
S2     67
Name: count, dtype: int64

In [252]:
from sklearn.preprocessing import OrdinalEncoder

class_order = [["D", "C", "B", "A", "S1", "S2"]]
enc = OrdinalEncoder(categories=class_order)
encoded_df["class"] = enc.fit_transform(df[["class"]])

encoded_df

Unnamed: 0,category_CLASSIC MUSCLE,category_CLASSIC RACERS,category_CLASSIC RALLY,category_CLASSIC SPORTS CARS,category_CULT CARS,category_EXTREME OFFROAD,category_EXTREME TRACK TOYS,category_GT CARS,category_HOT HATCH,category_HYPERCARS,...,category_SUPER HOT HATCH,category_SUPER SALOONS,category_TRACK TOYS,category_TRUCKS,category_VANS AND UTILITY,category_VINTAGE RACERS,transmission_AWD,transmission_FWD,transmission_RWD,class
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,1.0
1,False,False,False,False,False,False,False,False,True,False,...,False,False,False,False,False,False,False,True,False,2.0
2,False,False,True,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,0.0
3,False,False,False,False,True,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,0.0
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
747,False,False,False,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,True,5.0
748,False,False,False,False,False,False,False,False,False,True,...,False,False,False,False,False,False,False,False,True,5.0
749,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,0.0
750,False,False,False,False,False,False,False,False,False,True,...,False,False,False,False,False,False,False,False,True,5.0


## Scaler

In [253]:
from sklearn.preprocessing import StandardScaler

df_num_col = df.select_dtypes(include=["int64", "float64"]).columns
scaled_df = pd.DataFrame(
    StandardScaler().fit_transform(df[df_num_col]), columns=df_num_col
)
scaled_df

Unnamed: 0,pi,power_hp,weight_lbs,price,speed,handling,acceleration,breaking
0,-0.449573,-0.915193,-0.565632,-0.356494,-0.576406,-0.305142,-0.635740,-0.560262
1,-0.307350,-0.840158,-0.784333,-0.353767,-0.814046,-0.052216,-0.432908,0.002094
2,-1.056392,-1.008093,-0.813233,-0.359828,-0.972474,-1.063919,-0.703351,-1.066382
3,-2.710922,-1.404707,-1.519325,-0.361646,-2.239892,-1.506539,-1.920348,-1.347560
4,0.844658,0.539058,0.469292,-0.279827,0.532585,0.769792,2.068696,1.070570
...,...,...,...,...,...,...,...,...
747,1.204957,-0.129111,-1.252978,-0.231341,0.136517,2.287345,1.730642,2.026575
748,1.114883,1.282262,-0.125106,1.677771,1.562362,1.275643,0.784089,1.464219
749,-2.710922,-1.290368,-0.832760,-0.358615,-2.239892,-1.380076,-1.717515,-1.347560
750,1.423033,2.700781,0.163111,0.344423,1.720790,2.224114,1.122143,2.026575


In [254]:
combined_df = pd.concat([scaled_df, encoded_df], axis=1)
combined_df

Unnamed: 0,pi,power_hp,weight_lbs,price,speed,handling,acceleration,breaking,category_CLASSIC MUSCLE,category_CLASSIC RACERS,...,category_SUPER HOT HATCH,category_SUPER SALOONS,category_TRACK TOYS,category_TRUCKS,category_VANS AND UTILITY,category_VINTAGE RACERS,transmission_AWD,transmission_FWD,transmission_RWD,class
0,-0.449573,-0.915193,-0.565632,-0.356494,-0.576406,-0.305142,-0.635740,-0.560262,False,False,...,False,False,False,False,False,False,False,False,True,1.0
1,-0.307350,-0.840158,-0.784333,-0.353767,-0.814046,-0.052216,-0.432908,0.002094,False,False,...,False,False,False,False,False,False,False,True,False,2.0
2,-1.056392,-1.008093,-0.813233,-0.359828,-0.972474,-1.063919,-0.703351,-1.066382,False,False,...,False,False,False,False,False,False,False,False,True,0.0
3,-2.710922,-1.404707,-1.519325,-0.361646,-2.239892,-1.506539,-1.920348,-1.347560,False,False,...,False,False,False,False,False,False,False,False,True,0.0
4,0.844658,0.539058,0.469292,-0.279827,0.532585,0.769792,2.068696,1.070570,False,False,...,False,False,False,False,False,False,True,False,False,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
747,1.204957,-0.129111,-1.252978,-0.231341,0.136517,2.287345,1.730642,2.026575,False,False,...,False,False,False,False,False,False,False,False,True,5.0
748,1.114883,1.282262,-0.125106,1.677771,1.562362,1.275643,0.784089,1.464219,False,False,...,False,False,False,False,False,False,False,False,True,5.0
749,-2.710922,-1.290368,-0.832760,-0.358615,-2.239892,-1.380076,-1.717515,-1.347560,False,False,...,False,False,False,False,False,False,True,False,False,0.0
750,1.423033,2.700781,0.163111,0.344423,1.720790,2.224114,1.122143,2.026575,False,False,...,False,False,False,False,False,False,False,False,True,5.0


## Cosine Similarity

In [255]:
from sklearn.metrics.pairwise import cosine_similarity

similarity_matrix = cosine_similarity(combined_df)

similarity_df = pd.DataFrame(similarity_matrix, index=df["name"], columns=df["name"])
similarity_df.head()

name,2017 Abarth 124 SPIDER,2016 Abarth 695 BIPOSTO,1980 Abarth FIAT 131,1968 Abarth 595 ESSEESSE,2017 Acura NSX,2002 Acura RSX TYPE-S,2001 Acura INTEGRA TYPE-R,2018 Alfa Romeo STELVIO QUADRIFOGLIO,2017 Alfa Romeo GIULIA QUADRIFOGLIO,2016 Alfa Romeo GIULIA QUADRIFOGLIO FORZA EDITION,...,1963 Volkswagen TYPE 2 DE LUXE FORZA EDITION,2016 Volvo IRON KNIGHT,2015 Volvo V60 POLESTAR,1997 Volvo 850 R,1983 Volvo 242 TURBO EVOLUTION,2017 VUHL 05RR,2016 W Motors LYKAN HYPERSPORT,1945 WILLYS MB JEEP,2019 Zenvo TSR-S,2016 Zenvo ST1
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017 Abarth 124 SPIDER,1.0,0.616212,0.705092,0.672242,0.04201,0.609818,0.618611,0.109222,0.24247,0.24667,...,0.617951,-0.290463,0.35075,0.57681,0.748571,0.217052,0.079704,0.569818,-0.050548,-0.00616
2016 Abarth 695 BIPOSTO,0.616212,1.0,0.362566,0.406169,0.3855,0.7303,0.740302,0.356983,0.433261,0.476455,...,0.391857,-0.183632,0.483585,0.594631,0.492122,0.520651,0.351828,0.379566,0.246927,0.234648
1980 Abarth FIAT 131,0.705092,0.362566,1.0,0.872827,-0.38005,0.512309,0.521304,-0.216207,-0.235447,-0.239154,...,0.750378,-0.433471,0.093401,0.562638,0.88044,-0.260436,-0.376798,0.790332,-0.505402,-0.436585
1968 Abarth 595 ESSEESSE,0.672242,0.406169,0.872827,1.0,-0.438246,0.527555,0.534045,-0.309408,-0.300279,-0.294664,...,0.935706,-0.430202,0.034125,0.570666,0.717887,-0.295019,-0.405525,0.910973,-0.525989,-0.471987
2017 Acura NSX,0.04201,0.3855,-0.38005,-0.438246,1.0,0.072852,0.06804,0.880338,0.820077,0.862757,...,-0.396643,0.422273,0.664508,-0.046816,-0.044484,0.849601,0.820499,-0.388184,0.833861,0.79031


In [256]:
similarity_df.sample(10)

name,2017 Abarth 124 SPIDER,2016 Abarth 695 BIPOSTO,1980 Abarth FIAT 131,1968 Abarth 595 ESSEESSE,2017 Acura NSX,2002 Acura RSX TYPE-S,2001 Acura INTEGRA TYPE-R,2018 Alfa Romeo STELVIO QUADRIFOGLIO,2017 Alfa Romeo GIULIA QUADRIFOGLIO,2016 Alfa Romeo GIULIA QUADRIFOGLIO FORZA EDITION,...,1963 Volkswagen TYPE 2 DE LUXE FORZA EDITION,2016 Volvo IRON KNIGHT,2015 Volvo V60 POLESTAR,1997 Volvo 850 R,1983 Volvo 242 TURBO EVOLUTION,2017 VUHL 05RR,2016 W Motors LYKAN HYPERSPORT,1945 WILLYS MB JEEP,2019 Zenvo TSR-S,2016 Zenvo ST1
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2007 Toyota HILUX ARCTIC TRUCKS AT38,0.457518,0.424934,0.359913,0.459596,0.053559,0.463064,0.432622,0.157905,0.116008,0.133952,...,0.594714,0.022733,0.447091,0.487679,0.500176,0.000511,-0.061218,0.586203,-0.117539,-0.089105
2018 Saleen S1,0.284363,0.545583,-0.197233,-0.245555,0.909132,0.201161,0.206932,0.723756,0.889748,0.920081,...,-0.190023,0.285078,0.55584,0.041831,0.106226,0.940196,0.870068,-0.302474,0.85303,0.809557
2011 BMW Z4 SDRIVE35IS,0.652371,0.599343,0.1252,0.032764,0.68714,0.445093,0.440104,0.707393,0.812542,0.83231,...,0.004983,0.271663,0.712409,0.411656,0.476852,0.68595,0.706706,-0.023933,0.624479,0.673349
2004 Mitsubishi Lancer Evo VIII MR,0.40757,0.556223,0.124132,0.046208,0.687007,0.431442,0.430436,0.770021,0.58698,0.629311,...,-0.035498,0.118157,0.813338,0.394954,0.391979,0.548904,0.507496,0.123987,0.425002,0.451927
1955 Chevrolet 150 UTILITY SEDAN,0.611267,0.256636,0.834988,0.849029,-0.403862,0.489108,0.476237,-0.160528,-0.253943,-0.259263,...,0.755625,-0.200162,0.175071,0.632358,0.791415,-0.416125,-0.414915,0.820258,-0.53292,-0.422669
2014 Porsche 918 SPYDER,-0.087858,0.270995,-0.527481,-0.567578,0.933012,-0.049358,-0.051499,0.796544,0.832087,0.864079,...,-0.505329,0.493074,0.533129,-0.165024,-0.202807,0.852907,0.91405,-0.537982,0.949643,0.909052
1934 Alfa Romeo P3,0.221484,0.20347,0.159387,0.178472,0.075034,0.136717,0.147098,0.079142,0.129199,0.181668,...,0.128051,-0.04372,0.119307,0.154287,0.217868,0.146699,0.41318,0.122433,0.144802,0.174814
2018 Hot Wheels 2JETZ,0.228868,0.496929,-0.2223,-0.277864,0.850325,0.14206,0.156894,0.693104,0.849284,0.877359,...,-0.267068,0.294872,0.488913,-0.018114,0.041131,0.932525,0.866117,-0.350101,0.873328,0.830102
1956 Lotus ELEVEN,0.803632,0.627776,0.749157,0.740961,-0.066423,0.596704,0.62437,-0.042403,0.09438,0.105369,...,0.635198,-0.498728,0.200608,0.525837,0.673165,0.183621,0.00334,0.620888,-0.141861,-0.120196
2010 Audi TT RS COUPE,0.467354,0.577999,-0.042584,-0.101843,0.824613,0.377745,0.374347,0.840423,0.758637,0.800557,...,-0.129844,0.241214,0.818229,0.3135,0.292591,0.714287,0.699684,-0.041235,0.633658,0.646131


## Recommendation

In [None]:
def search_car(name):
    return df[df["name"].str.contains(name, case=False, na=False)]


def recommend_cars(name, n=5):
    # Get the index of the movie that matches the title
    idx = df[df["name"] == name].index[0]

    # Get the pairwise similarity scores of all movies with the given movie
    sim_scores = list(enumerate(similarity_matrix[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the n most similar movies
    sim_scores = sim_scores[1 : n + 1]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]

    # Return the top n most similar movies
    return df.iloc[movie_indices]

In [269]:
results = search_car("union")
results

Unnamed: 0,name,class,pi,category,transmission,power_hp,weight_lbs,price,speed,handling,acceleration,breaking
66,1939 Auto Union TYPE D,B,692,VINTAGE RACERS,RWD,485,2815,10000000,8.7,5.1,6.0,4.6


In [270]:
recommendations = recommend_cars("1939 Auto Union TYPE D", n=10)
recommendations

Unnamed: 0,name,class,pi,category,transmission,power_hp,weight_lbs,price,speed,handling,acceleration,breaking
516,1939 Mercedes-Benz W154,B,676,VINTAGE RACERS,RWD,483,2683,10000000,8.3,5.1,5.6,4.7
466,1939 Maserati 8CTF,B,648,VINTAGE RACERS,RWD,359,1764,10000000,7.3,5.2,5.4,4.8
17,1934 Alfa Romeo P3,B,626,VINTAGE RACERS,RWD,286,1653,10000000,6.6,5.0,5.6,4.9
672,1965 Shelby COBRA DAYTONA COUPE,B,645,CLASSIC RACERS,RWD,390,2299,8000000,7.3,5.0,5.5,4.8
208,1962 Ferrari 250 GTO,B,679,CLASSIC RACERS,RWD,300,2299,10000000,7.0,5.6,5.3,4.7
268,1966 Ford #2 GT40 MKII LE MANS,A,795,CLASSIC RACERS,RWD,485,2683,10000000,7.8,6.6,6.0,6.0
43,1958 Aston Martin DBR1,B,688,CLASSIC RACERS,RWD,253,1929,10000000,7.1,5.6,5.3,5.1
367,1964 Jaguar LIGHTWEIGHT E-TYPE,A,707,RARE CLASSICS,RWD,340,2293,10000000,6.9,5.5,6.0,5.5
210,1957 Ferrari 250 TESTA ROSSA,A,704,CLASSIC RACERS,RWD,300,2028,10000000,6.9,5.6,6.0,4.7
206,1963 Ferrari 250LM,A,732,CLASSIC RACERS,RWD,315,2094,10000000,7.1,6.0,5.1,5.6
