# User Collaborative Filtering Recommender Engine

## In this notebook we make a colaborative fltering recommender system based in users

In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot
%matplotlib inline

In [2]:
df = pd.read_csv('df_to_recommender.csv')

In [3]:
df.shape

(7771, 27)

In [4]:
rated = df.num_rated_beers
stats = rated.describe()
stats

count    7771.000000
mean      951.726161
std       411.347573
min       501.000000
25%       649.000000
50%       837.000000
75%      1161.000000
max      2197.000000
Name: num_rated_beers, dtype: float64

In [5]:
df=df[['user_id', 'profile_name', 'Id', 'names', 'num_rated_beers', 'style','overall']].copy()

In [6]:
df.tail()

Unnamed: 0,user_id,profile_name,Id,names,num_rated_beers,style,overall
7766,13567,acrawf6,23236,Zea Amber Lager,522,Vienna Lager,4.0
7767,13567,acrawf6,32318,Zea Category 5 American Pale Ale,522,American Pale Ale (APA),4.5
7768,13567,acrawf6,32319,German Lager,522,Märzen / Oktoberfest,4.5
7769,13567,acrawf6,40482,Gordon Biersch Smoked Märzen,522,Märzen / Oktoberfest,4.0
7770,13567,acrawf6,30074,Special Block 6,522,Belgian Pale Ale,4.0


In [7]:
## to check some profile names and input as sample_user = ' xxx'
ToKnowNames = df.groupby(['profile_name'])['names'].count().sort_values(ascending = False).head(10)
ToKnowNames

profile_name
 Kegatron        341
 paterlodie      279
 Redrover        193
 Naerhu          151
 sulldaddy       150
 mdagnew         149
 brendan13       149
 Vancer          146
 mynie           141
 msubulldog25    129
Name: names, dtype: int64

In [8]:
ratings = df.pivot_table(values='overall', index='user_id', columns='names')
ratings.fillna(0, inplace=True)
beer_index = ratings.columns
ratings.shape

(214, 7205)

In [9]:
ratings.head()

names,$ellout $tout,'Tis The Saison,'Zula Stout,'t Gaverhopke Zingende Blondine,'t Smisje Blond,'t Smisje Fiori,'t Smisje Grande Reserva,'t Smisje Great Reserva,'t Smisje Tripel,(512) Cascabel Cream Stout,...,""" La Malagne (for Archaeopark, Rochefort)""",""" Mischief, Double Dry-Hopped""",""" Nice Guy, Dud Root""",""" Saison De Lente, 100% Brett""",""" Schell's Anniversary Series #8, 1905 V.T. (Vacuum Tonic)""",""" Schlafly """"Sofa King"""" Red Ale""",""" Schlafly #20, Volume 1 - Imperial Pilsner""",""" Tovarish With Cacao, Toasted Almond & Coconut""",""" Upright (""""Punk Rock"""" Strong) Lager""",""" Zeppelin (for Den Ouden Advokaat, St.Pauwels)"""
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
491,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
594,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
977,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1198,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1214,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Sparsity = (17276 / (662 * 15437)) * 100 = 0.16% it is very low...

### Corrcoef returns Pearson product-moment correlation coefficients.

In [10]:
corr_matrix= np.corrcoef(ratings.T)
corr_matrix.shape

(7205L, 7205L)

### We create a similatity beer function to get all the similarities with other beers.

In [11]:
def get_beer_similarity(names):  
    '''Returns correlation vector for a beer'''
    beer_idx = list(beer_index).index(names)
    return corr_matrix[beer_idx]

In [12]:
a=get_beer_similarity(" Pumpkin Ale")
a.shape

(7205L,)

In [13]:
a

array([-0.01346934, -0.01346934, -0.01346934, ..., -0.01346934,
       -0.01346934, -0.01346934])

### Make a function to input user beers which returns a matrix with recommendations

In [14]:
def get_beer_recommendations(user_name):  
    '''given a set of beers, it returns all the beers sorted by their correlation with the user'''
    beer_similarities = np.zeros(corr_matrix.shape[0])
    for Id in user_name:
        beer_similarities = beer_similarities + get_beer_similarity(Id)
    similarities_df = pd.DataFrame({
        'beer_name': beer_index,
        'sum_similarity': beer_similarities
        })
    similarities_df = similarities_df[~(similarities_df.beer_name.isin(user_name))]
    similarities_df = similarities_df.sort_values(by=['sum_similarity'], ascending=False)
    return similarities_df.beer_name.head(5)

In [15]:
sample_user = ' barleywinefiend'
df[df.profile_name==sample_user].sort_values(by=['overall'], ascending=False)

Unnamed: 0,user_id,profile_name,Id,names,num_rated_beers,style,overall


In [16]:
sample_user_beers = df[df.profile_name==sample_user].names.tolist()  
recommendations = get_beer_recommendations(sample_user_beers)
recommendations

0                        $ellout $tout
4841             Pale Ale (Centennial)
4811                          Oxymoron
4810     Oxymore Periple En La Demeure
4809                  Oxford Raspberry
Name: beer_name, dtype: object

In [17]:
df1=df[df.profile_name==sample_user].head(5)
df1=df1.filter(items=['profile_name'])
df1.reset_index(inplace=True)
df1

Unnamed: 0,index,profile_name


In [18]:
df_final=recommendations.to_frame()
df_final.reset_index(inplace=True)
df_final

Unnamed: 0,index,beer_name
0,0,$ellout $tout
1,4841,Pale Ale (Centennial)
2,4811,Oxymoron
3,4810,Oxymore Periple En La Demeure
4,4809,Oxford Raspberry


In [19]:
df_final_prueba=pd.concat([df1['profile_name'], df_final['beer_name']], axis=1)

In [20]:
df_final_prueba

Unnamed: 0,profile_name,beer_name
0,,$ellout $tout
1,,Pale Ale (Centennial)
2,,Oxymoron
3,,Oxymore Periple En La Demeure
4,,Oxford Raspberry


In [21]:
df_filtered = df[df['profile_name'] == " barleywinefiend"]
df_filtered

Unnamed: 0,user_id,profile_name,Id,names,num_rated_beers,style,overall


### Make the application
### To make a dropdown list to select the user and get the 5 top recommendations

### Select the user in the dropdown list below, and get your top 5 recommendations!

In [22]:
import ipywidgets as widgets
from ipywidgets import interactive
 
items = sorted(df['profile_name'].unique().tolist())

def view(x=''):
    if x==x: return(get_beer_recommendations(df[df.profile_name==x].names.tolist()))
     
    
w = widgets.Select(options=items)
interactive(view, x=w)



599      Bavarian Style Hefeweizen
1862              Dark Star Porter
3846          Lakeside British Ale
5427              Rockford Bay IPA
3422              Imperial Pilsner
Name: beer_name, dtype: object