In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import euclidean_distances

In [2]:
df = pd.read_csv("whiskies.csv")

In [3]:
features = ['Body', 'Sweetness', 'Smoky', 'Medicinal', 'Tobacco', 'Honey', 'Spicy', 'Winey', 'Nutty',
    'Malty', 'Fruity', 'Floral']
def calculate_similarity(scotch1, scotch2):
    # get reviews
    scotch_1_reviews = df[df.Distillery==scotch1]
    scotch_2_reviews = df[df.Distillery==scotch2]
    dists = []
    for f in features:
        dists.append(euclidean_distances(np.reshape(scotch_1_reviews[f].values, (-1,1)), 
                                         np.reshape(scotch_2_reviews[f].values, (-1,1)))[0][0])
    return dists

In [4]:
simple_distances = []
for x in df.Distillery:
    print ("starting", x)
    for y in df.Distillery:
        if x != y:
            row = [x, y] + calculate_similarity(x, y)
            simple_distances.append(row)

starting Aberfeldy
starting Aberlour
starting AnCnoc
starting Ardbeg
starting Ardmore
starting ArranIsleOf
starting Auchentoshan
starting Auchroisk
starting Aultmore
starting Balblair
starting Balmenach
starting Belvenie
starting BenNevis
starting Benriach
starting Benrinnes
starting Benromach
starting Bladnoch
starting BlairAthol
starting Bowmore
starting Bruichladdich
starting Bunnahabhain
starting Caol Ila
starting Cardhu
starting Clynelish
starting Craigallechie
starting Craigganmore
starting Dailuaine
starting Dalmore
starting Dalwhinnie
starting Deanston
starting Dufftown
starting Edradour
starting GlenDeveronMacduff
starting GlenElgin
starting GlenGarioch
starting GlenGrant
starting GlenKeith
starting GlenMoray
starting GlenOrd
starting GlenScotia
starting GlenSpey
starting Glenallachie
starting Glendronach
starting Glendullan
starting Glenfarclas
starting Glenfiddich
starting Glengoyne
starting Glenkinchie
starting Glenlivet
starting Glenlossie
starting Glenmorangie
starting Gl

In [5]:
def calc_distance(dists, scotch1, scotch2):
    mask = (dists.Scotch1==scotch1) & (dists.Scotch2==scotch2)
    row = dists[mask]
    row = row[['Body', 'Sweetness', 'Smoky', 'Medicinal', 'Tobacco', 'Honey', 'Spicy', 'Winey', 
               'Nutty', 'Malty', 'Fruity', 'Floral']]
    return row.sum(axis=1).tolist()[0]

In [6]:
results = pd.DataFrame(simple_distances, columns=['Scotch1', 'Scotch2','Body', 'Sweetness', 'Smoky', 'Medicinal', 'Tobacco', 'Honey', 'Spicy', 'Winey', 'Nutty',
    'Malty', 'Fruity', 'Floral'])

In [7]:
my_scotch = "Glenlivet"
recs = []
for s in df.Distillery:
    if my_scotch!=s:
        recs.append((my_scotch, s, calc_distance(results, my_scotch, s)))
sorted(recs, key=lambda x: x[2])[:5]

[('Glenlivet', 'GlenElgin', 3.0),
 ('Glenlivet', 'Glenturret', 3.0),
 ('Glenlivet', 'Auchroisk', 4.0),
 ('Glenlivet', 'Knochando', 4.0),
 ('Glenlivet', 'Aberfeldy', 5.0)]

In [8]:
df[df.Distillery == 'Glenlivet']

Unnamed: 0,RowID,Distillery,Body,Sweetness,Smoky,Medicinal,Tobacco,Honey,Spicy,Winey,Nutty,Malty,Fruity,Floral,Postcode,Latitude,Longitude
48,49,Glenlivet,2,3,1,0,0,2,2,2,1,2,2,3,AB37 9DB,319560,828780


In [9]:
from anaconda_enterprise import publish

In [13]:
import json

In [10]:
def recommend_scotch(my_scotch):
    recs = []
    for s in df.Distillery:
        if my_scotch!=s:
            recs.append((my_scotch, s, calc_distance(results, my_scotch, s)))
    return sorted(recs, key=lambda x: x[2])[:5]

In [11]:
recommend_scotch("Talisker")

[('Talisker', 'Clynelish', 5.0),
 ('Talisker', 'Ardbeg', 6.0),
 ('Talisker', 'Oban', 7.0),
 ('Talisker', 'BenNevis', 9.0),
 ('Talisker', 'Dailuaine', 9.0)]

In [12]:
@publish(methods=['GET', 'POST'])
def test_input(my_scotch):
    return json.dumps({"Your Input": my_scotch[0]})

/opt/continuum/project


In [24]:
@publish(methods=['GET', 'POST'])
def make_rec(my_scotch):
    results = recommend_scotch(my_scotch[0])
    top_picks = []
    for x in results:
        top_picks.append((x[1], x[2]))
    return json.dumps({"Top 5 Picks For {}".format(my_scotch[0]) : top_picks})

In [15]:
foo = recommend_scotch("Talisker")

In [19]:
for x in foo:
    print ( x[1], x[2] )

Clynelish 5.0
Ardbeg 6.0
Oban 7.0
BenNevis 9.0
Dailuaine 9.0
