### t2.micro, AWS Marketplace -> Anaconda with Python 3

### 1) Popularity (User-based Collaborative Filtering)

In [2]:
import pandas as pd

cuisine = pd.read_csv('recommendation_cuisine.csv', sep = ',')

print('The Top 5 Most Popular Cuisine is:')
pd.DataFrame(cuisine.groupby(['Rcuisine'])['rating'].sum()).sort_values('rating', ascending=False).head(5)

The Top 5 Most Popular Cuisine is:


Unnamed: 0_level_0,rating
Rcuisine,Unnamed: 1_level_1
Mexican,283
Bar,168
Cafeteria,123
Fast_Food,106
Seafood,77


### 2) Correlation (Item-based Collaborative Filtering)

In [1]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd

cuisine             = pd.read_csv('recommendation_cuisine.csv', sep = ',')
ratings_matrix      = pd.pivot_table(data = cuisine, values='rating', index='userID', columns='placeID')
ratings_correlation = ratings_matrix.corrwith(ratings_matrix[135052]) # 135052 = La Cantina Restaurante
ratings_correlation = pd.DataFrame(ratings_correlation, columns=['PearsonR'])
ratings_correlation.dropna(inplace=True)
ratings_correlation = ratings_correlation.join(pd.DataFrame(cuisine.groupby('placeID')['rating'].count()))

print('The Most Comparable Restaurant to La Cantina Restaurante is:')
print(ratings_correlation[ratings_correlation['rating']>=10].sort_values('PearsonR', ascending=False).head(10))

The Most Comparable Restaurant to La Cantina Restaurante is:
         PearsonR  rating
placeID                  
132572   1.000000      15
135058   1.000000      18
132872   1.000000      12
132951   1.000000      10
135054   1.000000      10
135052   1.000000      50
132856   0.918085      14
135057   0.870388      15
132921   0.866025      17
132825   0.801002      32


### 3) Classification (Logistic Regression or other Machine Learning alogorithm)

In [2]:
import pandas as pd
from sklearn.linear_model import LogisticRegression

bank_data      = pd.read_csv('recommendation_bank.csv')
one_hot_encode = bank_data[[
                            'housing_loan'     , 'credit_in_default', 'personal_loans', 'prev_failed_to_subscribe',
                            'prev_subscribed'  , 'job_management'   , 'job_tech'      , 'job_entrepreneur'        ,
                            'job_bluecollar'   , 'job_unknown'      , 'job_retired'   , 'job_services'            ,
                            'job_self_employed', 'job_unemployed'   , 'job_maid'      , 'job_student'             ,
                            'married'          , 'single'           , 'divorced'
                          ]]
label          = bank_data['y']

customer_attributes = [[0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1]]
y_pred              = LogisticRegression(solver='lbfgs').fit(one_hot_encode, label).predict(customer_attributes)

print('Should this customer be recommended for a loan?: ' + y_pred[0])

Should this customer be recommended for a loan?: no


### 4) Model (Singular Value Decomposition)

In [2]:
import numpy as np
import pandas as pd
from sklearn.decomposition import TruncatedSVD

movie_reviews      = pd.read_csv('recommendation_movies.csv')
ratings_matrix     = movie_reviews.pivot_table(values='rating', index='user_id', columns='movie title', fill_value=0)
decomposed_matrix  = TruncatedSVD(n_components=10).fit_transform(ratings_matrix.T)
correlation_matrix = np.corrcoef(decomposed_matrix)
correlation_fargo  = correlation_matrix[list(ratings_matrix.columns).index('Fargo (1996)')]

print('The movies 90% similar to Fargo (1996) based upon ratings are:')
print(list(ratings_matrix.columns[(correlation_fargo > 0.9)]))

The movies 90% similar to Fargo (1996) based upon ratings are:
['Basquiat (1996)', 'Denise Calls Up (1995)', 'Fargo (1996)', 'Grosse Pointe Blank (1997)', 'Hudsucker Proxy, The (1994)', 'Mighty Aphrodite (1995)', 'Nikita (La Femme Nikita) (1990)', 'People vs. Larry Flynt, The (1996)', 'Swingers (1996)', 'Twelve Monkeys (1995)']


### 5) Content (K Nearest Neighbours)

In [2]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors

mtcars         = pd.read_csv('recommendation_cars.csv')
mtcars.columns = ['car_names', 'mpg', 'cyl', 'disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb']

knn          = NearestNeighbors(n_neighbors=1).fit(mtcars[['mpg', 'disp', 'hp', 'wt']].values)
car_features = [16, 250, 160, 3.7] # ['mpg', 'disp', 'hp', 'wt']
index        = knn.kneighbors([car_features])[1][0][0]

print('The car most like our given car_features is: ')
print(mtcars.loc[[index]][['car_names', 'mpg', 'disp', 'hp', 'wt']])

The car most like our given car_features is: 
     car_names   mpg   disp   hp    wt
11  Merc 450SE  16.4  275.8  180  4.07
