In [1]:
from surprise import Dataset, Reader
from surprise import SVD # implementation of Funk's SVD
from surprise import accuracy # metric
from surprise.model_selection import cross_validate, train_test_split, GridSearchCV #train/test splits, etc
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
import requests

In [2]:
def prepare_data():
    df=pd.read_csv('beer_reviews.csv')
    df.drop(columns=['beer_abv'],inplace=True)
    review_count_threshold = 2
    users = df.groupby('review_profilename').count()
    users = users.loc[users['beer_name'] >= review_count_threshold]
    users_to_keep = list(users.index)
    df = df.loc[df['review_profilename'].isin(users_to_keep)]
    beers = df.drop_duplicates(subset='beer_beerid')
    beers['complete_beer_name'] = beers.brewery_name + ' Brewery ' + beers.beer_name
    beers = beers[['beer_beerid','complete_beer_name','beer_style','brewery_id','beer_style']]
    df = df[['review_profilename','beer_beerid','review_overall']]
    df['review_overall'] = df.review_overall.astype('float16')
    df['beer_beerid'] = df.beer_beerid.astype('int32')
    df.drop_duplicates(subset = ['review_profilename','beer_beerid'],inplace=True)
    return({'beers_df':beers,'merge_df':df})

In [3]:
def extract_ratings(preceding_tr,user_url):
    r = requests.get('https://www.beeradvocate.com/user/beers/?ba='+user_url)
    soup = BeautifulSoup(r.text, 'html.parser')
    beer_names = []
    beer_ratings = []
    beer_body_tr = soup.body.find_all('tr')
    beer_list = beer_body_tr[preceding_tr:]
    for i,v in enumerate(beer_list):
        beer_url = v.a.attrs['href']
        beer_id = beer_url.split('/')[4]
        beer_names.append(beer_id)
        beer_rating = beer_body_tr[i+preceding_tr].find_all('b')[-1].text
        beer_ratings.append(beer_rating)
        data = pd.DataFrame({'review_profilename':'user_prediction','beer_beerid': beer_names,'review_overall': beer_ratings})
        data['beer_beerid'] = data.beer_beerid.astype('int32')
        data['review_overall'] = data.review_overall.astype('float16')
    return(data)


In [7]:
def get_recommendations(merge_df,user_df,beers_df):
    items_to_predict = beers_df[~beers_df.beer_beerid.isin(user_df.beer_beerid)].beer_beerid.unique()
    reader = Reader()
    user_data = Dataset.load_from_df(user_df, reader=reader)
    full_df = pd.concat([merge_df,user_df])
    full_df.reset_index(inplace=True,drop=True)
    reader = Reader()#line_format='user rating item', sep=',')
    data = Dataset.load_from_df(full_df, reader=reader)
    trainset, testset = train_test_split(data, test_size=.2, random_state = 42)
    svd = SVD(reg_all = .05, lr_all = 0.008, n_factors = 11, n_epochs = 65)
    svd.fit(trainset)
    predictions = []
    for i,v in enumerate(items_to_predict):
        recommendation = svd.predict(uid='user_prediction',iid=v,verbose=False)
        predictions.append(recommendation.est)
    predictions
    pred_df = pd.DataFrame({'beer_beerid':items_to_predict,'predicted_rating':predictions})
    pred_df = pred_df.sort_values(by='predicted_rating',ascending=False)
    pred_df = pred_df.merge(beers_df)
    cols = ['brewery_id', 'beer_beerid']
    pred_df['url'] = pred_df[cols].apply(lambda row: '/'.join(row.values.astype(str)), axis=1)
    pred_df['url'] =  "https://www.beeradvocate.com/beer/profile/" + pred_df.url + "/"
    return(pred_df)

In [8]:
def run_recommender(username,beers_df,merge_df):
    ratings = extract_ratings(3,username)
    return(get_recommendations(merge_df,ratings,beers_df)[:50])


In [9]:
run_recommender('Rug')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  beers['complete_beer_name'] = beers.brewery_name + ' Brewery ' + beers.beer_name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['review_overall'] = df.review_overall.astype('float16')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['beer_beerid'] = df.beer_beerid.astype('int32')
A value is try

Unnamed: 0,beer_beerid,predicted_rating,complete_beer_name,beer_style,brewery_id,beer_style.1,url
0,56702,5.000000,Cigar City Brewing Brewery Jolly Guava,American Wild Ale,17981,American Wild Ale,https://www.beeradvocate.com/beer/profile/1798...
1,70054,5.000000,AleSmith Brewing Company Brewery Alesmith Spee...,American Double / Imperial Stout,396,American Double / Imperial Stout,https://www.beeradvocate.com/beer/profile/396/...
2,17210,5.000000,Brouwerij Girardin Brewery Lambik (2 Year Old ...,Lambic - Unblended,2541,Lambic - Unblended,https://www.beeradvocate.com/beer/profile/2541...
3,64730,5.000000,Cigar City Brewing Brewery Kopi Con Leche Stout,Milk / Sweet Stout,17981,Milk / Sweet Stout,https://www.beeradvocate.com/beer/profile/1798...
4,44910,5.000000,De Struise Brouwers Brewery Dirty Horse,Lambic - Unblended,15237,Lambic - Unblended,https://www.beeradvocate.com/beer/profile/1523...
...,...,...,...,...,...,...,...
65996,32748,1.727514,Microbrasserie du Lièvre Brewery El Lapino,Chile Beer,3718,Chile Beer,https://www.beeradvocate.com/beer/profile/3718...
65997,27217,1.658051,Melanie Brewing Company Brewery Evil Eye Red,Fruit / Vegetable Beer,1422,Fruit / Vegetable Beer,https://www.beeradvocate.com/beer/profile/1422...
65998,55523,1.642819,Groupe Geloso Brewery Griffes Du Diable,American Malt Liquor,5470,American Malt Liquor,https://www.beeradvocate.com/beer/profile/5470...
65999,36683,1.580758,Melanie Brewing Company Brewery Jaguar High Gr...,American Malt Liquor,1422,American Malt Liquor,https://www.beeradvocate.com/beer/profile/1422...
