# Input Style Recommender Engine

## In this notebook we make an item based recommender system to input a STYLE which likes one user and get the TOP 5 SIMILAR BEERS

In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot
%matplotlib inline
import sys
reload(sys)
sys.setdefaultencoding("utf-8")
import ipywidgets as widgets
from ipywidgets import interactive

In [2]:
df = pd.read_csv('df_to_recommender.csv')

In [3]:
df.shape

(7771, 27)

In [4]:
rated = df.num_rated_beers
stats = rated.describe()
stats

count    7771.000000
mean      951.726161
std       411.347573
min       501.000000
25%       649.000000
50%       837.000000
75%      1161.000000
max      2197.000000
Name: num_rated_beers, dtype: float64

In [5]:
df=df[['user_id', 'profile_name', 'Id', 'names', 'num_rated_beers', 'style','overall']].copy()

In [6]:
df.tail()

Unnamed: 0,user_id,profile_name,Id,names,num_rated_beers,style,overall
7766,13567,acrawf6,23236,Zea Amber Lager,522,Vienna Lager,4.0
7767,13567,acrawf6,32318,Zea Category 5 American Pale Ale,522,American Pale Ale (APA),4.5
7768,13567,acrawf6,32319,German Lager,522,Märzen / Oktoberfest,4.5
7769,13567,acrawf6,40482,Gordon Biersch Smoked Märzen,522,Märzen / Oktoberfest,4.0
7770,13567,acrawf6,30074,Special Block 6,522,Belgian Pale Ale,4.0


### It is strange, but the following function does not work if our column it is called "style" because a kind of conflict with "Styler" command, therefore we change the column name to "estilo"

In [7]:
df.rename(columns={'style': 'estilo'}, inplace=True)

In [8]:
df.columns

Index([u'user_id', u'profile_name', u'Id', u'names', u'num_rated_beers',
       u'estilo', u'overall'],
      dtype='object')

In [9]:
## to check some profile names and input as sample_user = ' xxx'
ToKnowNames = df.groupby(['estilo'])['names'].count().sort_values(ascending = False).head(10)
ToKnowNames

estilo
 American IPA                      523
 American Pale Ale (APA)           452
 American Amber / Red Ale          321
 American Porter                   295
 Hefeweizen                        209
 American Double / Imperial IPA    200
 German Pilsener                   171
 Saison / Farmhouse Ale            170
 American Stout                    158
 M├ñrzen / Oktoberfest              147
Name: names, dtype: int64

In [10]:
ratings = df.pivot_table(values='overall', index='estilo', columns='names')
ratings.fillna(0, inplace=True)
beer_index = ratings.columns
ratings.shape

(104, 7205)

In [11]:
ratings.head()

names,$ellout $tout,'Tis The Saison,'Zula Stout,'t Gaverhopke Zingende Blondine,'t Smisje Blond,'t Smisje Fiori,'t Smisje Grande Reserva,'t Smisje Great Reserva,'t Smisje Tripel,(512) Cascabel Cream Stout,...,""" La Malagne (for Archaeopark, Rochefort)""",""" Mischief, Double Dry-Hopped""",""" Nice Guy, Dud Root""",""" Saison De Lente, 100% Brett""",""" Schell's Anniversary Series #8, 1905 V.T. (Vacuum Tonic)""",""" Schlafly """"Sofa King"""" Red Ale""",""" Schlafly #20, Volume 1 - Imperial Pilsner""",""" Tovarish With Cacao, Toasted Almond & Coconut""",""" Upright (""""Punk Rock"""" Strong) Lager""",""" Zeppelin (for Den Ouden Advokaat, St.Pauwels)"""
estilo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Altbier,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
American Adjunct Lager,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
American Amber / Red Ale,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0
American Amber / Red Lager,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
American Barleywine,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Corrcoef returns Pearson product-moment correlation coefficients

In [12]:
corr_matrix= np.corrcoef(ratings.T)
corr_matrix.shape

(7205L, 7205L)

### we create a similatity beer function to get all the similarities with other beers.

In [13]:
def get_beer_similarity(names):  
    '''Returns correlation vector for a beer'''
    beer_idx = list(beer_index).index(names)
    return corr_matrix[beer_idx]

In [14]:
a=get_beer_similarity(" Pumpkin Ale")
a.shape

(7205L,)

In [15]:
a

array([-0.00970874, -0.00970874, -0.00970874, ..., -0.00970874,
       -0.00970874, -0.00970874])

### make a function to input user beers which returns a matrix with recommendations

In [16]:
def get_beer_recommendations(stl):  
    '''given a set of beers, it returns all the beers sorted by their correlation with the style'''
   
    beer_similarities = np.zeros(corr_matrix.shape[0])
    for Id in stl:
        beer_similarities = beer_similarities + get_beer_similarity(Id)
    similarities_df = pd.DataFrame({
        'beer_name': beer_index,
        'sum_similarity': beer_similarities
        })
    similarities_df = similarities_df[~(similarities_df.beer_name.isin(stl))]
    similarities_df = similarities_df.sort_values(by=['sum_similarity'], ascending=False)
    return similarities_df.beer_name.head(5)

In [17]:
sample_style = ' American Adjunct Lager'
df[df.estilo==sample_style].sort_values(by=['overall'], ascending=False)

Unnamed: 0,user_id,profile_name,Id,names,num_rated_beers,estilo,overall
4431,6317,JISurfer,43981,Primo,561,American Adjunct Lager,5.0
4423,6317,JISurfer,1642,Bud Ice,561,American Adjunct Lager,5.0
4457,6317,JISurfer,6887,Sambadoro,561,American Adjunct Lager,4.5
1843,19964,flagmantho,831,Sagres Cerveja,948,American Adjunct Lager,4.5
6383,977,BMoney575,3759,Michigan Brewing Hamtramck Beer,724,American Adjunct Lager,4.5
3294,30047,sleazo,26313,Victoria,593,American Adjunct Lager,4.5
4070,21977,jackndan,5271,Tank Beer,567,American Adjunct Lager,4.5
4072,21977,jackndan,2966,Belco Beer,567,American Adjunct Lager,4.5
4113,23311,jsprain1,64979,Godfather,764,American Adjunct Lager,4.5
2428,10180,Redrover,33288,Yellow Moon Rice'n,1178,American Adjunct Lager,4.5


In [18]:
sample_style_beers = df[df.estilo==sample_style].names.tolist()  
recommendations = get_beer_recommendations(sample_style_beers)
recommendations

1190              British Bulldog
4483          New Speedway Bitter
4352     Morrissey Fox Blonde Ale
2759                    Goldihops
2761                     Goldings
Name: beer_name, dtype: object

In [19]:
df1=df[df.estilo==sample_style].head(5)
df1=df1.filter(items=['estilo'])
df1.reset_index(inplace=True)
df1

Unnamed: 0,index,estilo
0,135,American Adjunct Lager
1,214,American Adjunct Lager
2,352,American Adjunct Lager
3,685,American Adjunct Lager
4,962,American Adjunct Lager


In [20]:
df_final=recommendations.to_frame()
df_final.reset_index(inplace=True)
df_final

Unnamed: 0,index,beer_name
0,1190,British Bulldog
1,4483,New Speedway Bitter
2,4352,Morrissey Fox Blonde Ale
3,2759,Goldihops
4,2761,Goldings


In [21]:
df_final_prueba=pd.concat([df1['estilo'], df_final['beer_name']], axis=1)

In [22]:
df_final_prueba

Unnamed: 0,estilo,beer_name
0,American Adjunct Lager,British Bulldog
1,American Adjunct Lager,New Speedway Bitter
2,American Adjunct Lager,Morrissey Fox Blonde Ale
3,American Adjunct Lager,Goldihops
4,American Adjunct Lager,Goldings


In [23]:
df_filtered = df[df['estilo'] == " American Adjunct Lager"]
df_filtered

Unnamed: 0,user_id,profile_name,Id,names,num_rated_beers,estilo,overall
135,19981,flexabull,2772,Stoney's Premium Lager,1255,American Adjunct Lager,4.0
214,3849,DogFood11,3940,Imperial,1200,American Adjunct Lager,4.0
352,33346,zoso1967,20817,Lawnmower Lager,509,American Adjunct Lager,4.5
685,5232,GratefulBeerGuy,6264,Honey Pilsner,1018,American Adjunct Lager,4.5
962,7111,Kegatron,65125,Cabbage Hill Pilsner,2150,American Adjunct Lager,4.0
1196,1404,BeerSox,47181,Honey Lager,1167,American Adjunct Lager,4.5
1344,18368,deltatauhobbit,5317,Salva Vida,825,American Adjunct Lager,4.0
1418,12846,Vancer,7447,Brahma Chopp,1641,American Adjunct Lager,4.0
1419,12846,Vancer,2910,Presidente,1641,American Adjunct Lager,4.0
1505,12846,Vancer,6393,Skol,1641,American Adjunct Lager,4.0


### Make the application
### Creating a dropdown list to select the style

### Select your favourite style in the dropdown list below, and get your top 5 recommendations!

In [24]:
items = sorted(df['estilo'].unique().tolist())

def view(x=''):
    if x==x: return(get_beer_recommendations(df[df.estilo==x].names.tolist()))
     
    
w = widgets.Select(options=items)
interactive(view, x=w)



566                          Barrilito
5521                    Sagres Cerveja
2443                       Firefly Ale
3500     Ittinger Original Klosterbr├ñu
7112                Yellow Moon Rice'n
Name: beer_name, dtype: object