# Model training
Using a neural network for the recommender system

In [1]:
import pandas as pd

In [2]:
activites = pd.read_csv('activities.csv')
activites.head()

Unnamed: 0,id,type,description,url,lattitude,longitude,score
0,0,sport,climbing,https://www.facebook.com/groups/escaladeromand...,46.512947,6.624772,1
1,1,sport,badminton,http://www.badmintonlausanne.ch/,46.52829,6.601945,4
2,2,sport,swimming pool,https://www.lausanne-tourisme.ch/fr/decouvrir/...,46.522474,6.605101,3
3,3,sport,climbing,https://totem.ch/?ec,46.516749,6.548327,4
4,4,meditation,yoga,https://totem.ch/yoga#studio?smooth,46.516749,6.548327,4


In [3]:
# Read the CSV of interactions and sort it by timestamp
data = pd.read_csv('interactions.csv')
data_sorted = data.sort_values('timestamp')
data_sorted.head()

Unnamed: 0,user_id,item_id,rating,timestamp
0,1,0,2,1616193271
2,1,2,0,1616193911
1,1,1,1,1616194035
3,1,3,0,1616194210
4,0,2,2,1616194210


In [4]:
# Cut the data off at 75% in order to save some data for the test process
CUTOFF = 0.75
cutoff_idx = int(len(data_sorted) * CUTOFF)

# Generate the train and test data
data_train = data_sorted.iloc[0:cutoff_idx]
data_test = data_sorted.iloc[cutoff_idx:]

data_train.shape, data_test.shape

((6, 4), (3, 4))

In [6]:
def get_intersection_test_and_train(field):
    """
    Get interactions between the test and train datasets
    for the given field
    """

    return (
        set(data_test[field].unique())
        & set(data_train[field].unique())
    )


# Get the interactions between test and train for user_id and item_id
interactions_user, intractions_item = get_intersection_test_and_train('user_id'), get_intersection_test_and_train('item_id')

# Clean the test dataset to make sure it does not contain "solo" data
data_test_clean = (
    data_test.loc[
        data_test['user_id'].isin(interactions_user)
        & data_test['item_id'].isin(intractions_item)
    ]
)

data_test_clean.shape

(3, 4)

In [7]:
def is_test_data_all_in_train(field):
    return data_test_clean[field].isin(data_train[field]).all()

assert is_test_data_all_in_train('user_id')
assert is_test_data_all_in_train('item_id')

In [18]:
def create_mapping_from_data_train(field):
    return {elem: i for i, elem in enumerate(data_train[field].unique())}

user_to_id, item_to_id = create_mapping_from_data_train('user_id'), create_mapping_from_data_train('item_id')

def create_dataset_from_mapping(init_dataset, *fields):
    """
    Returns a clone of the dataset by applying the mappings
    created with `create_mapping_from_data_train` on each field
    listed in the parameters.  
    """

    mappings = {field: create_mapping_from_data_train(field) for field in fields}

    dataset = init_dataset.copy()

    for field in fields:
        dataset[field] = dataset_train[field].apply(lambda x: mappings[field][x])

    return dataset

dataset_train, dataset_test = create_dataset_from_mapping(data_train, 'user_id', 'item_id'), create_dataset_from_mapping(data_test_clean, 'user_id', 'item_id')
dataset_train.shape, dataset_test.shape

((6, 4), (3, 4))

In [None]:
def get_x_y(dataset, x_fields, y_field):
    return (
        dataset[x_fields].values,
        dataset[y_field].values
    )

X_train, y_train = get_x_y(
    dataset_train,
    ['user_id', 'item_id'],
    'rating'
)
X_test, y_test = get_x_y(
    dataset_test,
    ['user_id', 'item_id'],
    'rating'
)