In [None]:
import pandas as pd

data_url = 'https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv'
cars = pd.read_csv(data_url)
cars.columns = ['car_names', 'mpg', 'cyl', 'disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb']
cars.head()

In [None]:
cars.shape

## Step 1: define the features

In [None]:
from sklearn.preprocessing import StandardScaler

# Extract only some subset of columns to reduce computation time 
X = cars[['mpg', 'disp', 'hp', 'wt']].values

# Standardize the features so that no feature dominates the
# distance computations due to unit scale
scaler = StandardScaler().fit(X)
X = scaler.transform(X)

## Step 2: define a distance metric

In [None]:
from sklearn.metrics.pairwise import euclidean_distances

## Step 3: recommend items

In [None]:
# Car the user is looking at
looking_at_car = [15, 300, 160, 3.2] #mpg, disp, hp, wt
looking_at_car = scaler.transform([looking_at_car])

# Distance from all other cars
distances = euclidean_distances(X, looking_at_car) # (n_rows1, n_cols), (n_rows2, n_cols) --> (n_rows1, n_rows2)
distances_reshaped = distances.reshape(-1)   # Before it was (n_cars, 1), now just one dimension

print(distances.shape)
distances

In [None]:
distances_reshaped

In [None]:
# Find the 3 indices with the minimum distance (highest similarity) to the car we're looking at
ordered_indices = distances_reshaped.argsort() #smallest values to largest
ordered_indices

In [None]:
closest_indices = ordered_indices[:3] # three closest

# Get the cars for these indices
closest_cars = cars.iloc[closest_indices]
closest_cars

#remember looking_at_car = [15, 300, 160, 3.2] #mpg, disp, hp, wt