In [160]:
import surprise
from surprise.prediction_algorithms import *
import pandas as pd
import numpy as np
import datetime as dt
import requests
import json

In [161]:
with open('C:/Users/clare/Documents/Flatiron/PatternRecommender/.secrets/creds.json') as f:
    creds = json.load(f)

In [162]:
input_df = pd.read_csv('saved_100000_calls.csv')
finished_projects = input_df[input_df['status'] == 'Finished']
df_drop_nans = finished_projects[['user', 'pattern_id', 'rating']].dropna(subset = ['rating'])

## Replaced Nans negatively impact accuracy

df_replace_nans = finished_projects[['user', 'pattern_id', 'rating', 'average_rating']]
rating_replace_nans = df_replace_nans['rating'].fillna(df_replace_nans['average_rating'])
df_replace_nans['rating'] = rating_replace_nans
df_replace_nans.drop(columns = 'average_rating', inplace = True)

df_replace_nans

In [163]:
from surprise import Reader, Dataset
reader = Reader()

data_drop = Dataset.load_from_df(df_drop_nans, reader)
data_replace = Dataset.load_from_df(df_replace_nans, reader)

In [164]:
from surprise.model_selection import train_test_split

drop_trainset, drop_testset = train_test_split(data_drop, test_size=0.25)
replace_trainset, replace_testset = train_test_split(data_replace, test_size=0.25)

In [165]:
from surprise import SVD, accuracy

SVD_1_drop = SVD(n_factors = 40, n_epochs = 45, lr_all = 0.002, reg_all = 0.2)
SVD_1_drop.fit(drop_trainset)

accuracy.rmse(SVD_1_drop.test(drop_testset))

RMSE: 0.5233


0.5233432577329478

In [166]:
SVD_1_replace = SVD(n_factors = 40, n_epochs = 45, lr_all = 0.002, reg_all = 0.2)
SVD_1_replace.fit(replace_trainset)

accuracy.rmse(SVD_1_replace.test(replace_testset))

RMSE: 0.7140


0.7140159177163934

In [26]:
from surprise.model_selection import GridSearchCV

param_grid = {'n_factors':[5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
              'n_epochs': [5, 10, 15, 20, 25, 30, 35, 40, 45, 50], 
              'lr_all': [0.002, 0.003, 0.004, 0.005],
              'reg_all': [0.2, 0.3, 0.4, 0.5, 0.6]}

gs_model = GridSearchCV(SVD,
                        param_grid=param_grid,
                        n_jobs = -1,
                        joblib_verbose=5)

gs_model.fit(data_drop)

gs_model.best_params

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  48 tasks      | elapsed:    8.6s
[Parallel(n_jobs=-1)]: Done 138 tasks      | elapsed:   22.0s
[Parallel(n_jobs=-1)]: Done 264 tasks      | elapsed:   45.0s
[Parallel(n_jobs=-1)]: Done 426 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 624 tasks      | elapsed:  2.5min
[Parallel(n_jobs=-1)]: Done 858 tasks      | elapsed:  4.3min
[Parallel(n_jobs=-1)]: Done 1128 tasks      | elapsed:  5.8min
[Parallel(n_jobs=-1)]: Done 1434 tasks      | elapsed:  7.1min
[Parallel(n_jobs=-1)]: Done 1776 tasks      | elapsed:  9.5min
[Parallel(n_jobs=-1)]: Done 2154 tasks      | elapsed: 12.0min
[Parallel(n_jobs=-1)]: Done 2568 tasks      | elapsed: 14.1min
[Parallel(n_jobs=-1)]: Done 3018 tasks      | elapsed: 18.2min
[Parallel(n_jobs=-1)]: Done 3504 tasks      | elapsed: 20.4min
[Parallel(n_jobs=-1)]: Done 4026 tasks      | elapsed: 25.4min
[Parallel(n_jobs=-1)]: Done 4584 tasks      | e

{'rmse': {'n_factors': 15, 'n_epochs': 30, 'lr_all': 0.003, 'reg_all': 0.2},
 'mae': {'n_factors': 10, 'n_epochs': 40, 'lr_all': 0.005, 'reg_all': 0.2}}

In [167]:
# from GridSearch

GS_SVD = SVD(n_factors = 15, n_epochs = 30, lr_all = 0.003, reg_all = 0.2)
GS_SVD.fit(drop_trainset)

predictions = GS_SVD.test(drop_testset)
accuracy.rmse(predictions)

RMSE: 0.5230


0.5230168630445167

In [168]:
from surprise.prediction_algorithms import knns
from surprise.similarities import cosine, msd, pearson
from surprise import accuracy

Try replace on these 7:

In [169]:
sim_cos = {'name':'cosine', 'user_based':True}

basic = knns.KNNBasic(min_k = 8, sim_options=sim_cos)
basic.fit(drop_trainset)
predictions = basic.test(drop_testset)
print(accuracy.rmse(predictions))

Computing the cosine similarity matrix...
Done computing similarity matrix.

  sim = construction_func[name](*args)



RMSE: 0.5960
0.5960426436894842


In [170]:
sim_pearson = {'name':'pearson', 'user_based':True}

basic = knns.KNNBasic(min_k = 8, sim_options=sim_pearson)
basic.fit(drop_trainset)
predictions = basic.test(drop_testset)
print(accuracy.rmse(predictions))

Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 0.5940
0.5939947057244532


In [171]:
sim_pearson = {'name':'pearson', 'user_based':True}

basic = knns.KNNWithMeans(min_k = 8, sim_options=sim_pearson)
basic.fit(drop_trainset)
predictions = basic.test(drop_testset)
print(accuracy.rmse(predictions))

Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 0.5520
0.5519886097490372


In [172]:
sim_pearson = {'name':'pearson', 'user_based':True}
knn_baseline = knns.KNNBaseline(sim_options=sim_pearson)
knn_baseline.fit(drop_trainset)
predictions = knn_baseline.test(drop_testset)
print(accuracy.rmse(predictions))

Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 0.5502
0.5502146003207348


In [173]:
SVDppmodel = SVDpp(n_factors = 15, n_epochs = 30, lr_all = 0.003, reg_all = 0.2)

SVDppmodel.fit(drop_trainset)
predictions = SVDppmodel.test(drop_testset)
accuracy.rmse(predictions)

RMSE: 0.5229


0.5228835730345681

In [174]:
Slope_One = SlopeOne()

Slope_One.fit(drop_trainset)
predictions = Slope_One.test(drop_testset)
accuracy.rmse(predictions)

RMSE: 0.6163


0.6163454586664328

In [175]:
cocluster = CoClustering()

cocluster.fit(drop_trainset)
predictions = cocluster.test(drop_testset)
accuracy.rmse(predictions)

RMSE: 0.6120


0.6120333233275684

Maybe a voting classifier here and fit the outcome to the full trainset

In [176]:
best_model = SVD(n_factors = 15, n_epochs = 30, lr_all = 0.003, reg_all = 0.2)

Fit without Train-Test Split:


In [177]:
from surprise.dataset import *

trainset = DatasetAutoFolds.build_full_trainset(data_drop)

best_model.fit(trainset)



<surprise.prediction_algorithms.matrix_factorization.SVD at 0x2c709c1e9a0>

In [222]:
predictions_df = pd.DataFrame({"user": [prediction.uid for prediction in predictions],
                   "item": [prediction.iid for prediction in predictions],
                   "actual": [prediction.r_ui for prediction in predictions],
                   "estimated" :[prediction.est for prediction in predictions]})

predictions

[Prediction(uid='HDStitches', iid=2450, r_ui=4.0, est=3.2067630497387944, details={'was_impossible': False}),
 Prediction(uid='kimiko1968', iid=291971, r_ui=4.0, est=3.622981392785996, details={'was_impossible': False}),
 Prediction(uid='EnzoPasquale', iid=75857, r_ui=3.0, est=3.7577137914538965, details={'was_impossible': False}),
 Prediction(uid='meldc', iid=217, r_ui=4.0, est=3.36870636957685, details={'was_impossible': False}),
 Prediction(uid='carylwaite', iid=143844, r_ui=4.0, est=3.956592323602013, details={'was_impossible': False}),
 Prediction(uid='RagnaRokkOn', iid=344809, r_ui=3.0, est=4.214240212854932, details={'was_impossible': False}),
 Prediction(uid='SandraD', iid=84080, r_ui=1.0, est=3.622981392785996, details={'was_impossible': False}),
 Prediction(uid='SandraD', iid=57426, r_ui=4.0, est=3.322552523423004, details={'was_impossible': False}),
 Prediction(uid='GeorgieShaw', iid=141477, r_ui=4.0, est=3.622981392785996, details={'was_impossible': False}),
 Prediction(uid

predictions

In [178]:
# Number of unique patterns in train set

len(input_df['pattern_id'].unique())

60252

In [179]:
users_list = list(input_df['user'].unique())

In [180]:
#predictions for new user

def get_user_projects_finished(user):
   
    url ='https://api.ravelry.com/projects/' + user + '/list.json?sort=completed_'
    response = requests.get(url, auth=(creds['id'], creds['key']))
    projects = []
    try:
        for project in response.json()['projects']:
            if project['craft_name'] == 'Knitting': 
                if project['pattern_id'] != None:
                    pattern_url ='https://api.ravelry.com/patterns.json?ids=' + str(int(project['pattern_id']))
                    pattern_response = requests.get(pattern_url, auth=(creds['id'], creds['key']))
                    project_tuple = (user, project['completed'], project['rating'], project['status_name'], 
                                     project['pattern_id'],
                                     pattern_response.json()['patterns'][str(int(project['pattern_id']))]['rating_average'],
                                     pattern_response.json()['patterns'][str(int(project['pattern_id']))]['rating_count'])
                    projects.append(project_tuple)
                    df = pd.DataFrame(projects, columns = ['user', 'completed', 'rating', 'status', 'pattern_id', 'average_rating', 'rating_count'])
            
    except ValueError:
        print('not a user')
        pass
    
    return df


In [181]:
def get_user_projects_not_finished(user):

    users_projects_not_completed = requests.get('https://api.ravelry.com/projects/' + user + '/list.json', 
                                                auth=(creds['id'], creds['key']))

    df = pd.DataFrame(users_projects_not_completed.json()['projects'])
    users_projects_not_completed = list(set(df[df['status_name'] != 'Finished']['pattern_id'].dropna()))
    return users_projects_not_completed

In [182]:
def get_user_queue(user):

    users_queue = requests.get('https://api.ravelry.com/people/' + user + '/queue/list.json?page_size=100', 
                                                auth=(creds['id'], creds['key']))
    
    users_queue = list(set(pd.DataFrame(users_queue.json()['queued_projects'])['pattern_id'].dropna()))

    return users_queue

In [183]:
def get_user_favorites(user):

    users_favourites = requests.get('https://api.ravelry.com/people/' + user + '/favorites/list.json?page_size=100', 
                                                auth=(creds['id'], creds['key']))
    
    df = pd.DataFrame(users_favourites.json()['favorites'])
    users_favourites = list(pd.DataFrame(list(df[df['type'] == 'pattern']['favorited']))['id'])
    
    return users_favourites

In [184]:
def top_rated_15(user):
    
    if user in users_list:
        
        patterns_list = list(input_df['pattern_id'].unique())
    
        predictions = []
        
        users_patterns = list(input_df[input_df['user'] == user]['pattern_id'])
        users_favourites = get_user_favorites(user)
        users_queue = get_user_queue(user)
        users_projects_not_completed = get_user_projects_not_finished(user)
        
        previously_interacted = users_patterns + users_favourites + users_queue + users_projects_not_completed
        
        remaining_patterns = [x for x in patterns_list if x not in previously_interacted]

    
        for pattern in remaining_patterns:
            x = best_model.predict(user, pattern)
            predictions.append(x)
        
        predictions_df = pd.DataFrame({"user": [prediction.uid for prediction in predictions],
                                       "item": [prediction.iid for prediction in predictions],
                                       "actual": [prediction.r_ui for prediction in predictions],
                                       "estimated" :[prediction.est for prediction in predictions]})
    
        predictions_df = predictions_df.sort_values('estimated', ascending = False).head(15)
    
        return predictions_df
    
    elif user not in users_list:
        
        try: 
            
            new_user_ratings = get_user_projects_finished(user)
            new_user_input_df = input_df.append(new_user_ratings).reset_index().drop(columns = 'index')
        
            finished_projects_new_user = new_user_input_df[new_user_input_df['status'] == 'Finished']
            df_drop_nans_new_user = finished_projects_new_user[['user', 'pattern_id', 'rating']].dropna(subset = ['rating'])
        
            reader = Reader()
            data_drop_new_user = Dataset.load_from_df(df_drop_nans_new_user, reader)
            trainset_new_user = DatasetAutoFolds.build_full_trainset(data_drop_new_user)
        
            best_model.fit(trainset_new_user)
    
            patterns_list = list(new_user_input_df['pattern_id'].unique())
            predictions = []
        
            users_patterns = list(new_user_input_df[new_user_input_df['user'] == user]['pattern_id'])
            users_favourites = get_user_favorites(user)
            users_queue = get_user_queue(user)
            users_projects_not_completed = get_user_projects_not_finished(user)
        
            previously_interacted = users_patterns + users_favourites + users_queue + users_projects_not_completed
        
            remaining_patterns = [x for x in patterns_list if x not in previously_interacted]

    
            for pattern in remaining_patterns:
                x = best_model.predict(user, pattern)
                predictions.append(x)
        
            predictions_df = pd.DataFrame({"user": [prediction.uid for prediction in predictions],
                                           "item": [prediction.iid for prediction in predictions],
                                           "actual": [prediction.r_ui for prediction in predictions],
                                           "estimated" :[prediction.est for prediction in predictions]})
    
            predictions_df = predictions_df.sort_values('estimated', ascending = False).head(15)
    
            return predictions_df
    
        except: 
       
    else: 
        
        return 'User not in model'
        

In [231]:
ratings = top_rated_15('jacquieblackman')

recs = []

for pattern in list(ratings['item']):

    pattern_url ='https://api.ravelry.com/patterns.json?ids=' + str(pattern)
    pattern_response = requests.get(pattern_url, auth=(creds['id'], creds['key']))
    recs.append('ravelry.com/patterns/library/' + str(pattern_response.json()['patterns'][str(pattern)]['permalink']))
    
recs

['ravelry.com/patterns/library/octopus',
 'ravelry.com/patterns/library/olive--jack',
 'ravelry.com/patterns/library/31-baby-sweaters-hat--baby-blankets',
 'ravelry.com/patterns/library/still-light-tunic',
 'ravelry.com/patterns/library/katies-kep',
 'ravelry.com/patterns/library/mitered-crosses-blanket-for-mercy-corps',
 'ravelry.com/patterns/library/nr-7-marius-panneband',
 'ravelry.com/patterns/library/cladonia',
 'ravelry.com/patterns/library/circlet',
 'ravelry.com/patterns/library/sweet-dreams-17',
 'ravelry.com/patterns/library/baby-mitts-2',
 'ravelry.com/patterns/library/vogel-grundanleitung',
 'ravelry.com/patterns/library/vanilla-socks-on-magic-loop',
 'ravelry.com/patterns/library/wonder-woman-wrap-knit',
 'ravelry.com/patterns/library/tulips-a-colorful-cardigan-for-baby']

In [232]:
ratings = top_rated_15('scarahliz')

recs = []

for pattern in list(ratings['item']):

    pattern_url ='https://api.ravelry.com/patterns.json?ids=' + str(pattern)
    pattern_response = requests.get(pattern_url, auth=(creds['id'], creds['key']))
    recs.append('ravelry.com/patterns/library/' + str(pattern_response.json()['patterns'][str(pattern)]['permalink']))
    
recs

['ravelry.com/patterns/library/31-baby-sweaters-hat--baby-blankets',
 'ravelry.com/patterns/library/octopus',
 'ravelry.com/patterns/library/still-light-tunic',
 'ravelry.com/patterns/library/mitered-crosses-blanket-for-mercy-corps',
 'ravelry.com/patterns/library/olive--jack',
 'ravelry.com/patterns/library/katies-kep',
 'ravelry.com/patterns/library/baby-mitts-2',
 'ravelry.com/patterns/library/nr-7-marius-panneband',
 'ravelry.com/patterns/library/tulips-a-colorful-cardigan-for-baby',
 'ravelry.com/patterns/library/sweet-dreams-17',
 'ravelry.com/patterns/library/vanilla-socks-on-magic-loop',
 'ravelry.com/patterns/library/inishmore-cap',
 'ravelry.com/patterns/library/nkken-2',
 'ravelry.com/patterns/library/cladonia',
 'ravelry.com/patterns/library/be-loving']

In [233]:
ratings

Unnamed: 0,user,item,actual,estimated
37262,scarahliz,386272,,3.860243
1160,scarahliz,14072,,3.851175
4411,scarahliz,180466,,3.829881
2801,scarahliz,235123,,3.829211
28902,scarahliz,717881,,3.819624
823,scarahliz,1017282,,3.799733
1557,scarahliz,181471,,3.790254
28282,scarahliz,114847,,3.788496
2985,scarahliz,5106,,3.784994
3425,scarahliz,318247,,3.782296


In [234]:
get_user_projects_finished('scarahliz')

Unnamed: 0,user,completed,rating,status,pattern_id,average_rating,rating_count
0,scarahliz,2021/02/06,3.0,Finished,1026858,4.857143,7.0
1,scarahliz,2020/12/20,4.0,Finished,686706,4.871324,272.0
2,scarahliz,2020/06/30,4.0,Finished,613512,4.777778,9.0
3,scarahliz,2019/12/23,3.0,Finished,572559,4.571429,21.0
4,scarahliz,2018/11/30,4.0,Finished,426231,4.735076,5662.0
...,...,...,...,...,...,...,...
83,scarahliz,,,Frogged,77059,4.176471,51.0
84,scarahliz,,,Finished,506008,4.625000,24.0
85,scarahliz,,,Frogged,88,4.225446,6618.0
86,scarahliz,,,,109176,3.925926,27.0


In [228]:
ratings = top_rated_15('clare240')

recs = []

for pattern in list(ratings['item']):

    pattern_url ='https://api.ravelry.com/patterns.json?ids=' + str(pattern)
    pattern_response = requests.get(pattern_url, auth=(creds['id'], creds['key']))
    recs.append('ravelry.com/patterns/library/' + str(pattern_response.json()['patterns'][str(pattern)]['permalink']))
    
recs

['ravelry.com/patterns/library/octopus',
 'ravelry.com/patterns/library/31-baby-sweaters-hat--baby-blankets',
 'ravelry.com/patterns/library/mitered-crosses-blanket-for-mercy-corps',
 'ravelry.com/patterns/library/olive--jack',
 'ravelry.com/patterns/library/katies-kep',
 'ravelry.com/patterns/library/still-light-tunic',
 'ravelry.com/patterns/library/cladonia',
 'ravelry.com/patterns/library/bousta-beanie',
 'ravelry.com/patterns/library/sweet-dreams-17',
 'ravelry.com/patterns/library/two-by-two-basic-beanie',
 'ravelry.com/patterns/library/baby-mitts-2',
 'ravelry.com/patterns/library/monkey-jacobus',
 'ravelry.com/patterns/library/tulips-a-colorful-cardigan-for-baby',
 'ravelry.com/patterns/library/vanilla-socks-on-magic-loop',
 'ravelry.com/patterns/library/wonder-woman-wrap-knit']

In [229]:
ratings = top_rated_15('scalesknits')

recs = []

for pattern in list(ratings['item']):

    pattern_url ='https://api.ravelry.com/patterns.json?ids=' + str(pattern)
    pattern_response = requests.get(pattern_url, auth=(creds['id'], creds['key']))
    recs.append('ravelry.com/patterns/library/' + str(pattern_response.json()['patterns'][str(pattern)]['permalink']))
    
recs

['ravelry.com/patterns/library/octopus',
 'ravelry.com/patterns/library/31-baby-sweaters-hat--baby-blankets',
 'ravelry.com/patterns/library/mitered-crosses-blanket-for-mercy-corps',
 'ravelry.com/patterns/library/still-light-tunic',
 'ravelry.com/patterns/library/olive--jack',
 'ravelry.com/patterns/library/katies-kep',
 'ravelry.com/patterns/library/nr-7-marius-panneband',
 'ravelry.com/patterns/library/baby-mitts-2',
 'ravelry.com/patterns/library/cladonia',
 'ravelry.com/patterns/library/sweet-dreams-17',
 'ravelry.com/patterns/library/circlet',
 'ravelry.com/patterns/library/tulips-a-colorful-cardigan-for-baby',
 'ravelry.com/patterns/library/owlet-6',
 'ravelry.com/patterns/library/bousta-beanie',
 'ravelry.com/patterns/library/magrathea']

In [230]:
ratings = top_rated_15('elfsmirk')

recs = []

for pattern in list(ratings['item']):

    pattern_url ='https://api.ravelry.com/patterns.json?ids=' + str(pattern)
    pattern_response = requests.get(pattern_url, auth=(creds['id'], creds['key']))
    recs.append('ravelry.com/patterns/library/' + str(pattern_response.json()['patterns'][str(pattern)]['permalink']))
    
recs

['ravelry.com/patterns/library/octopus',
 'ravelry.com/patterns/library/katies-kep',
 'ravelry.com/patterns/library/31-baby-sweaters-hat--baby-blankets',
 'ravelry.com/patterns/library/olive--jack',
 'ravelry.com/patterns/library/mitered-crosses-blanket-for-mercy-corps',
 'ravelry.com/patterns/library/still-light-tunic',
 'ravelry.com/patterns/library/sweet-dreams-17',
 'ravelry.com/patterns/library/tulips-a-colorful-cardigan-for-baby',
 'ravelry.com/patterns/library/nr-7-marius-panneband',
 'ravelry.com/patterns/library/space-girl',
 'ravelry.com/patterns/library/baby-mitts-2',
 'ravelry.com/patterns/library/camilla-blanket',
 'ravelry.com/patterns/library/cladonia',
 'ravelry.com/patterns/library/owlet-6',
 'ravelry.com/patterns/library/nkken-2']

In [None]:
import pickle

pickle_out = open('pickled_files/model.pickle', 'wb')
pickle.dump(lm_kBest, pickle_out)
pickle_out.close()