In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import sys

sys.path.append(os.path.join(os.getcwd(), '..', '..', '..'))

import pandas as pd
from tqdm import tqdm

In [None]:
from src.defaults import REPO_DS, TRAIN_DS, TEST_DS
from src.lightfm.recommender import LightFMModel

In [None]:
train = pd.read_csv(TRAIN_DS).sort_values(by='full_name')
test = pd.read_csv(TEST_DS)
repo_features = pd.read_csv(REPO_DS)

In [None]:
NUM_FEATURES = ['n_all_issues', 'n_branches', 'n_closed_issues', 'n_forks',
                'n_milestones_all', 'n_milestones_closed', 'n_milestones_open',
                'n_open_issues', 'n_pr_all', 'n_pr_closed', 'n_pr_open', 'n_stars']

In [None]:
github_crawler = None

In [None]:
lightfm = LightFMModel(crawler=github_crawler)
lightfm.prepare()

In [None]:
data_results = []

losses = ['bpr', 'warp']
no_components_list = [10, 25, 50]
optimizers = ['adagrad', 'adadelta']
user_features_types = [None, 'description', 'numerical']

total = len(losses) * len(no_components_list) * len(optimizers) * len(user_features_types) * 3
with tqdm(total=total) as pbar:
    for loss in losses:
        for no_components in no_components_list:
            for optimizer in optimizers:
                for user_features_type in user_features_types:
                    for i in range(3):
                        lightfm.fit(train=None, user_features_type=None,
                                    no_components=no_components, loss=loss,
                                    learning_schedule=optimizer)
                        results = lightfm.evaluate(test=None)
                        results['optimizer'] = optimizer
                        results['no_components'] = no_components
                        results['loss'] = loss
                        results['user_features'] = user_features_type
                        data_results.append(results)
                        pbar.update(1)

In [None]:
data_results = pd.DataFrame(data_results)

round(data_results.fillna('none').groupby(
    ['optimizer', 'loss', 'no_components', 'user_features']).mean().sort_values(by='recall@5',
                                                                                ascending=False), 3)

In [None]:
kwargs = {'user_features_type': None, 'num_epochs': 15, 'no_components': 50, 'loss': 'warp',
          'learning_schedule': 'adagrad'}

lightfm.fit(train=None, **kwargs)
lightfm.evaluate(test=None)

In [None]:
repo_name = 'lkiesow/python-feedgen'

In [None]:
repository = github_crawler.crawl_extract_repository(repo_name)

In [None]:
this_repo_features = pd.DataFrame(repository).drop(
    columns=['repo_requirements']).drop_duplicates()
repository = pd.DataFrame(repository)
this_test = repository[['full_name', 'repo_requirements']]

In [None]:
lightfm.refit_model(this_test, this_repo_features)

In [None]:
lightfm.predict_repo(repo_name)

In [None]:
lightfm.num_users

In [None]:
lightfm.num_items