In [None]:
import os 
import numpy as np 
import pandas as pd
from time import time
 
from train_test import get_train_test, convert_df_to_matrix, _read_df_in_format

# Surprise package for dataloading and evaluation
from surprise import Dataset, Reader
from surprise import accuracy as acc    
from surprise import KNNWithMeans, SVD, NMF, SlopeOne, BaselineOnly, NormalPredictor

# Baseline 1: Averaging
from averaging import UserAverage, ItemAverage, UserItemAverage

# Baseline 2: Iterative SVD 
from svdals import normalize, ALS

# Baseline 3: Neural Collaborative Filtering
import torch
from torch.utils.data import DataLoader
from torch.optim import Adam, SGD
from torch.nn import MSELoss

from ncf import CFDataset, GMF, MLP, NeuMF
from ncf import nn_train, nn_predict

# BFM + Adaptions 
from bfm import run_bfm, generate_clusters, run_bfm_augmented, get_RelationBlocks

import myfm

from sklearn.linear_model import ElasticNet

The code is structured as follows:
1. Baseline: Simple averaging
2. Baseline: Iterative SVD + ALS
3. Baseline: NCF
4. BFM + Simple Adaptations
5. RainFM

The data used are those provided within the Kaggle competition. To set the data folder, please refer to the **Data Loading** section and change the ```data_folder```.  
To produce the exact Kaggle outputs, please refer to the **RainFM** section and change the ```TRAIN_MODE``` to be False.

In [146]:
# For evaluationn
def print_scores(y_true, y_pred, name):
    rmse = np.sqrt(np.mean((y_true - y_pred)**2))
    mae = np.mean(np.abs(y_true - y_pred))
    print(f"Method: {name}, RMSE: {rmse:.5f}, MAE: {mae:.4f}")

# Data Loading

NOTE: These experiments are conducted on a custom train-test split for comparison. To generate the final results for submission, the data will just be the full training data set (train_df_full) instead. This can be adjusted via the ```TRAIN_MODE``` for our final method and submission.

In [None]:
data_folder = '../data/' # SET THIS TO YOUR DATA FOLDER

In [3]:
train_df, test_df = get_train_test(os.path.join(data_folder, 'data_train.csv'), split_num=0)
train_matrix = convert_df_to_matrix(train_df)

y_true = test_df['Prediction'].to_numpy()

train_df_full = _read_df_in_format(os.path.join(data_folder, 'data_train.csv'))
train_matrix_full = convert_df_to_matrix(train_df_full)
ans_df = _read_df_in_format(os.path.join(data_folder, 'sampleSubmission.csv'))

In [4]:
# For usage with Surprise package
reader = Reader(rating_scale=(1, 5))
train_data = Dataset.load_from_df(train_df, reader)
test_data = Dataset.load_from_df(test_df, reader)

trainset = train_data.build_full_trainset()
testset = test_data.build_full_trainset().build_testset()
anti_trainset = trainset.build_anti_testset()

# Full trainset
train_data_full = Dataset.load_from_df(train_df_full, reader)
trainset_full = train_data_full.build_full_trainset()
anti_trainset_full = trainset_full.build_anti_testset()

# For final submission
ans_data = Dataset.load_from_df(ans_df, reader)
ansset = ans_data.build_full_trainset().build_testset()

# Baseline 1: Averages

This is performed primarily as a sanity check for future methods; it represents the average across the various dimensions

In [10]:
methods = [UserAverage, ItemAverage, UserItemAverage]

for method in methods:
    start_time = time()
    algo = method()
    algo.fit(trainset)
    predictions = algo.test(testset)
    print(f"Method: {method.__name__}, RMSE: {acc.rmse(predictions, False):.4f}, MAE: {acc.mae(predictions, False):.4f}, Time: {time() - start_time:.2f}", end = '\n')

Method: UserAverage, RMSE: 1.0949, MAE: 0.9009, Time: 3.20
Method: ItemAverage, RMSE: 1.0309, MAE: 0.8398, Time: 1.22
Method: UserItemAverage, RMSE: 1.0314, MAE: 0.8482, Time: 2.17


# Baseline 2: SVD + ALS

This method first applies SVD iteratively with shrinkage as an initialization for the U and V matrices. After which, the decomposition is limited to $k$ ranks and Alternating Least Squares is performed to optimize the performance

In [5]:
train_matrix_na = train_matrix.copy()
train_matrix_na[train_matrix_na == 0] = np.nan
A, mean, std = normalize(train_matrix_na)

A = A.to_numpy()
A[np.isnan(A)] = 0
mask_A = A != 0

In [None]:
als = ALS()
start = time()
U, V = als.ALS(A, mask_A, k=3, shrinkage=30, lambd=0.1, n_iter_svd=5, n_iter_als=20)
end = time(); print(f"Time: {end - start:.2f}")

Initializing IterSVD
IterSVD completelete
Iteration 1		Error after solving for U matrix: 0.8906752977216362		Error after solving for V matrix: 0.8708053729411698
Iteration 2		Error after solving for U matrix: 0.8694372505362978		Error after solving for V matrix: 0.8636924463845832
Iteration 3		Error after solving for U matrix: 0.8645094918264005		Error after solving for V matrix: 0.8616159457467563
Iteration 4		Error after solving for U matrix: 0.862471854297844		Error after solving for V matrix: 0.8606922252739754
Iteration 5		Error after solving for U matrix: 0.8614053721819654		Error after solving for V matrix: 0.8601915538561902
Iteration 6		Error after solving for U matrix: 0.8607683438029089		Error after solving for V matrix: 0.8598855986892148
Iteration 7		Error after solving for U matrix: 0.8603541840677057		Error after solving for V matrix: 0.8596830864651613
Iteration 8		Error after solving for U matrix: 0.8600684730715922		Error after solving for V matrix: 0.859541208642487


In [8]:
predictions = als.predict(U, V, mean, std)
row_ids = test_df.row.to_numpy() - 1
col_ids = test_df.col.to_numpy() - 1
test_preds = predictions[row_ids, col_ids]

In [14]:
print_scores(y_true, test_preds, "Iterative SVD with ALS")

Method: Iterative SVD with ALS, RMSE: 0.9921, MAE: 0.7896


# Baseline 3: NCF

This uses Neural Collaborative Filtering. To improve the process, a Generalzied Factorization Machine and a Multi-Layer Perceptron are first trained separately, then used as pre-trained weights for the final model

In [None]:
trainset = CFDataset(train_df.values)
train_loader = DataLoader(trainset, batch_size=256, shuffle=True)

testset = CFDataset(test_df.values)
test_loader = DataLoader(testset, batch_size=256, shuffle=False)

In [None]:
latent_dim = 32
hidden_dims = [64, 32]
num_users, num_items = train_matrix.shape

## GMF
Generalized Matrix Factorization

In [None]:
gmf = GMF(latent_dim=latent_dim, num_users=num_users, num_items=num_items)
loss_function = MSELoss()
optimizer = Adam(gmf.parameters(), lr=0.001)

model = nn_train(gmf, train_loader, loss_function, optimizer)
torch.save(model.state_dict(), 'models/gmf.pth')

Epoch: 9, Batch: 3600, Loss: 1.038

In [None]:
y_pred_gmf = nn_predict(gmf, test_loader)
y_pred_gmf = np.clip(y_pred_gmf, 1, 5)
print_scores(y_true, y_pred_gmf, "GMF Only")

Method: GMF Only, RMSE: 1.0822, MAE: 0.8795


## MLP
Multi-Layer Perceptron

In [None]:
mlp = MLP(latent_dim=latent_dim, num_users=num_users, num_items=num_items, hidden_layers=hidden_dims)
loss_function = MSELoss()
optimizer = Adam(mlp.parameters(), lr=0.001)

model = nn_train(mlp, train_loader, loss_function, optimizer)
torch.save(model.state_dict(), 'models/mlp.pth')

Epoch: 9, Batch: 3600, Loss: 1.024

In [None]:
y_pred_mlp = nn_predict(mlp, test_loader)
y_pred_mlp = np.clip(y_pred_mlp, 1, 5)
print_scores(y_true, y_pred_mlp, "MLP Only")

Method: MLP Only, RMSE: 1.0029, MAE: 0.8105


## NeuMF with pre-training
This makes use of the previously learnt GMF and MLP as initializations for the NeuMF model. The models are weighted by an $\alpha$ value where $\alpha=0$ fully uses the MLP model, while $\alpha=1$ fully uses the GMF model.  
After tuning for various $\alpha$ values, an appropriate value was selected

In [None]:
neumf_pretrained = NeuMF(latent_dim=latent_dim, num_users=num_users, num_items=num_items, hidden_layers=hidden_dims, pretrained=True, alpha=0.05)
neumf_dict = neumf_pretrained.state_dict()

In [None]:
gmf_state_dict = torch.load('models/gmf.pth')
mlp_state_dict = torch.load('models/mlp.pth')

pretrained_dict_gmf = {k: v for k, v in gmf_state_dict.items() if k in neumf_dict}
pretrained_dict_mlp = {k: v for k, v in mlp_state_dict.items() if k in neumf_dict}

neumf_dict.update(pretrained_dict_gmf)
neumf_dict.update(pretrained_dict_mlp)
neumf_pretrained.load_state_dict(neumf_dict)

<All keys matched successfully>

In [None]:
loss_function = MSELoss()
optimizer = SGD(neumf_pretrained.parameters(), lr=0.001)

model = nn_train(neumf_pretrained, train_loader, loss_function, optimizer)

Epoch: 9, Batch: 3600, Loss: 0.997

In [None]:
y_pred_neumf_pretrained = nn_predict(neumf_pretrained, test_loader)
y_pred_neumf_pretrained = np.clip(y_pred_neumf_pretrained, 1, 5)
print_scores(y_true, y_pred_neumf_pretrained, "NeuMF Pretrained")

Method: NeuMF Pretrained, RMSE: 1.0041, MAE: 0.8092


# Bayesian Factorization Machines

## Baseline
This only uses the individual ratings, without utilising any other knowledge about user/item

In [43]:
y_pred_bfm, fm = run_bfm(train_df, test_df, rank=10, fm_kind='classifier') 

w0 = 0.12, cutpoint = ['-1.983', '-1.237', '-0.263', '0.613'] : 100%|██████████| 200/200 [02:43<00:00,  1.22it/s]


In [44]:
print_scores(y_true, y_pred_bfm, "BFM Baseline")

Method: BFM Baseline, RMSE: 0.9777, MAE: 0.7809


## Fusing with KNN

When performing preliminary experiments with various methods, we noticed that K-Nearest-Neighbours worked relatively well despite its simplicity. Therefore, we wanted to supplement the BFM with KNN predictions.  
Using KNN to train the model, we generated predictions for all datapoints within rating matrix and their corresponding clusters.

In [27]:
antitrain_df = generate_clusters(trainset, anti_trainset, n_clusters=30)
# antitrain_df.to_csv('models/knn_clusters.csv', index=False)

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.


In [46]:
y_pred_ensemble = run_bfm_augmented(train_df, antitrain_df, test_df, n_samples_per_cluster=50000, rank=10, seed_lst=[1, 42, 66, 88, 420])

alpha = 2.49 w0 = 2.58 : 100%|██████████| 200/200 [04:47<00:00,  1.44s/it]
alpha = 2.49 w0 = 2.58 : 100%|██████████| 200/200 [05:52<00:00,  1.76s/it]
alpha = 2.49 w0 = 2.58 : 100%|██████████| 200/200 [05:14<00:00,  1.57s/it]
alpha = 2.49 w0 = 2.58 : 100%|██████████| 200/200 [05:17<00:00,  1.59s/it]
alpha = 2.49 w0 = 2.58 : 100%|██████████| 200/200 [07:21<00:00,  2.21s/it]


In [47]:
print_scores(y_true, y_pred_ensemble['Prediction_avg'], "BFM with KNN Preds")

Method: BFM with KNN Preds, RMSE: 0.9840, MAE: 0.7808


### Fusion method on the full training set (to combine in the final blended model)

In [56]:
antitrain_df_full = generate_clusters(trainset_full, anti_trainset_full, n_clusters=30)
# antitrain_df_full.to_csv('models/knn_clusters_full.csv', index=False)

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.


In [63]:
y_pred_ensemble_full = run_bfm_augmented(train_df_full, antitrain_df_full, ans_df, n_samples_per_cluster=50000, rank=10, seed_lst=[1, 42, 66, 88, 420])

alpha = 2.20 w0 = 2.59 : 100%|██████████| 200/200 [06:26<00:00,  1.93s/it]
alpha = 2.20 w0 = 2.59 : 100%|██████████| 200/200 [07:52<00:00,  2.36s/it]
alpha = 2.20 w0 = 2.59 : 100%|██████████| 200/200 [07:15<00:00,  2.18s/it]
alpha = 2.20 w0 = 2.59 : 100%|██████████| 200/200 [08:16<00:00,  2.48s/it]
alpha = 2.20 w0 = 2.59 : 100%|██████████| 200/200 [08:46<00:00,  2.63s/it]


In [67]:
y_pred_ensemble_full.head()

Unnamed: 0,row,col,Prediction,Prediction_1,Prediction_42,Prediction_66,Prediction_88,Prediction_420,Prediction_avg
0,37,1,3,3.241306,3.274826,3.262135,3.280221,3.247981,3.261294
1,73,1,3,3.082694,3.070482,3.08499,3.073493,3.095778,3.081487
2,156,1,3,3.754485,3.742079,3.73802,3.753527,3.746164,3.746855
3,160,1,3,3.305074,3.26709,3.296088,3.286407,3.310042,3.29294
4,248,1,3,3.480581,3.446603,3.457871,3.476602,3.457095,3.46375


# RainFM

In the first section, we run various simple baseline models. Subsequently, we conducted Bayesian Factorization Machines augmented by various features. We then split the users into various clusters to allow us to have different models for each cluster. To produce the final model, we then blended models together for each cluster to produce ensembled predictions for submission.

The results for each of the baseline methods and the final blended method are reported at the end.

In [None]:
TRAIN_MODE = True # SET THIS TO FALSE IF YOU WANT TO TRAIN FINAL MODEL FOR SUBMISSION

In [None]:
if not TRAIN_MODE:
    train_df = train_df_full
    test_df = ans_df
    trainset = trainset_full
    testset = ansset

train_models = train_df
test_models = test_df

In [5]:
def get_predictions(model):
    """
    Given a trained model, returns the predicted ratings for the training and test sets.

    Args:
    - model: A trained model object that has a `test()` method.

    Returns:
    - train_pred: A list of predicted ratings for the training set.
    - test_pred: A list of predicted ratings for the test set.
    """
    train_predictions = model.test(trainset.build_testset())
    test_predictions = model.test(testset)


    mapping = {}
    for prediciton in train_predictions:
        mapping[(prediciton.uid, prediciton.iid)] = prediciton.est

    train_pred = []
    for index, row in train_df.iterrows():
        train_pred.append(mapping[(row.row, row.col)])


    mapping= {}
    for prediciton in test_predictions:
        mapping[(prediciton.uid, prediciton.iid)] = prediciton.est

    test_pred = []
    for index, row in test_df.iterrows():
        test_pred.append(mapping[(row.row, row.col)])

    return train_pred, test_pred

## Simple Baseline Models

In [26]:
knnM = KNNWithMeans(k = 215, min_k= 11, verbose= False, sim_options = {"name": "pearson_baseline", "user_based": False})
knnM.fit(trainset)

train_predictions, test_predictions = get_predictions(knnM)
train_models["KNNWithMeans"] = train_predictions
test_models["KNNWithMeans"] = test_predictions

In [28]:

baseline = BaselineOnly(bsl_options={'method': 'als', 'n_epochs': 155, 'reg_u': 12.389737311297987, 'reg_i':0.00010816431053605042})
baseline.fit(trainset)

train_predictions, test_predictions = get_predictions(baseline)
train_models["BaselineOnly"] = train_predictions
test_models["BaselineOnly"] = test_predictions

Estimating biases using als...


In [74]:
nfm = NMF(n_factors =210, n_epochs = 185 ,reg_pu = 3.554509817040489 , reg_qi = 0.053153974679699414, biased = True)
nfm.fit(trainset)

train_predictions, test_predictions = get_predictions(nfm)
train_models["NMF"] = train_predictions
test_models["NMF"] = test_predictions

In [29]:
slopeOne = SlopeOne()
slopeOne.fit(trainset)

train_predictions, test_predictions = get_predictions(slopeOne)
train_models["SlopeOne"] = train_predictions
test_models["SlopeOne"] = test_predictions


In [30]:
normalPredictor = NormalPredictor()
normalPredictor.fit(trainset)

train_predictions, test_predictions = get_predictions(normalPredictor)
train_models["NormalPredictor"] = train_predictions
test_models["NormalPredictor"] = test_predictions

In [33]:
SvD = SVD(n_factors = 380, n_epochs = 115, lr_all = 0.003308328065201225, reg_all = 0.07818404435260741 )
SvD.fit(trainset)

train_predictions, test_predictions = get_predictions(SvD)
train_models["SVD"] = train_predictions
test_models["SVD"] = test_predictions

## Bayesian Factorization Machines

In [13]:
train_blocks, test_blocks, feature_group_sizes = get_RelationBlocks(train_df, test_df)

In [15]:
bfm_OrderProbit_6 = myfm.MyFMOrderedProbit(rank=27)
bfm_OrderProbit_6.fit(
    None, train_df.Prediction.values, X_rel=train_blocks,
    group_shapes=feature_group_sizes,
    n_iter=896, n_kept_samples=895,
);

train_prediction = bfm_OrderProbit_6.predict_proba(None, train_blocks)
train_models["bfm_OrderProbit_6"] = train_prediction.dot(np.arange(6))

test_prediction = bfm_OrderProbit_6.predict_proba(None, test_blocks)
test_models["bfm_OrderProbit_6"] = test_prediction.dot(np.arange(6))


w0 = 0.10, cutpoint = ['-5.847', '-2.140', '-1.360', '-0.342', '0.577'] : 100%|██████████| 896/896 [3:50:20<00:00, 15.42s/it]    


In [16]:
bfm_OrderProbit_5 = myfm.MyFMOrderedProbit(rank=27)
bfm_OrderProbit_5.fit(
    None, train_df.Prediction - 1, X_rel=train_blocks,
    group_shapes=feature_group_sizes,
    n_iter=896, n_kept_samples=895,
);

train_prediction = bfm_OrderProbit_5.predict_proba(None, train_blocks)
train_models["bfm_OrderProbit_5"] = train_prediction.dot(np.arange(5)) + 1

test_prediction = bfm_OrderProbit_5.predict_proba(None, test_blocks)
test_models["bfm_OrderProbit_5"] = test_prediction.dot(np.arange(5)) + 1


w0 = -0.00, cutpoint = ['-2.189', '-1.411', '-0.397', '0.523'] : 100%|██████████| 896/896 [2:17:03<00:00,  9.18s/it]    


In [17]:
bfm_OrderProbit_6_small = myfm.MyFMOrderedProbit(rank=23)
bfm_OrderProbit_6_small.fit(
    None, train_df.Prediction , X_rel=train_blocks,
    group_shapes=feature_group_sizes,
    n_iter=900, n_kept_samples=900,
);

train_prediction = bfm_OrderProbit_6_small.predict_proba(None, train_blocks)
train_models["bfm_OrderProbit_6_small"] = train_prediction.dot(np.arange(6))

test_prediction = bfm_OrderProbit_6_small.predict_proba(None, test_blocks)
test_models["bfm_OrderProbit_6_small"] = test_prediction.dot(np.arange(6))

w0 = 0.01, cutpoint = ['-5.793', '-2.170', '-1.400', '-0.393', '0.519'] : 100%|██████████| 900/900 [19:30<00:00,  1.30s/it] 


In [18]:
bfm_OrderProbit_5_small = myfm.MyFMOrderedProbit(rank=23)
bfm_OrderProbit_5_small.fit(
    None, train_df.Prediction - 1, X_rel=train_blocks,
    group_shapes=feature_group_sizes,
    n_iter=900, n_kept_samples=900,
);

train_prediction = bfm_OrderProbit_5_small.predict_proba(None, train_blocks)
train_models["bfm_OrderProbit_5_small"] = train_prediction.dot(np.arange(5)) + 1

test_prediction = bfm_OrderProbit_5_small.predict_proba(None, test_blocks)
test_models["bfm_OrderProbit_5_small"] = test_prediction.dot(np.arange(5)) + 1

w0 = -0.02, cutpoint = ['-2.194', '-1.424', '-0.417', '0.492'] : 100%|██████████| 900/900 [18:04<00:00,  1.21s/it]


In [23]:
bfm_variational = myfm.VariationalFMRegressor(rank=29)
bfm_variational .fit(
    None, train_df.Prediction, X_rel=train_blocks,
    group_shapes=feature_group_sizes,
    n_iter=701
);


train_models["bfm_variational"] = bfm_variational.predict(None,train_blocks)
test_models["bfm_variational"] = bfm_variational.predict(None,test_blocks)


alpha = 1.06 w0 = 3.30 : 100%|██████████| 701/701 [26:12<00:00,  2.24s/it]


In [66]:
train_models['avg_user_rating'] = train_df['row'].map(train_df.groupby('row')['Prediction'].mean())
test_models['avg_user_rating'] = test_df['row'].map(train_df.groupby('row')['Prediction'].mean())

train_models['avg_movie_rating'] = train_df['col'].map(train_df.groupby('col')['Prediction'].mean())
test_models['avg_movie_rating'] = test_df['col'].map(train_df.groupby('col')['Prediction'].mean())


train_models['user_rating_count'] = train_df['row'].map(train_df.groupby('row')['Prediction'].count())
test_models['user_rating_count'] = test_df['row'].map(train_df.groupby('row')['Prediction'].count())

train_models['movie_rating_count'] = train_df['col'].map(train_df.groupby('col')['Prediction'].count())
test_models['movie_rating_count'] = test_df['col'].map(train_df.groupby('col')['Prediction'].count())

train_models['user_rating_std'] = train_df['row'].map(train_df.groupby('row')['Prediction'].std())
test_models['user_rating_std'] = test_df['row'].map(train_df.groupby('row')['Prediction'].std())

train_models['movie_rating_std'] = train_df['col'].map(train_df.groupby('col')['Prediction'].std())
test_models['movie_rating_std'] = test_df['col'].map(train_df.groupby('col')['Prediction'].std())



In [69]:
# Group the training data by user and count the number of ratings for each user
user_count_train = train_df.groupby('row')['Prediction'].count()

# Divide the users into four bins based on their rating count
bin1 = user_count_train[user_count_train < user_count_train.quantile(0.25)].index
bin2 = user_count_train[(user_count_train >= user_count_train.quantile(0.25)) &  (user_count_train < user_count_train.quantile(0.5))].index
bin3 = user_count_train[(user_count_train >= user_count_train.quantile(0.5)) &  (user_count_train < user_count_train.quantile(0.75)) ].index
bin4 = user_count_train[(user_count_train >= user_count_train.quantile(0.75))  ].index

# Store the user indices for each bin in a list
bins_train = [bin1.values, bin2.values, bin3.values, bin4.values]

## Blending

In [None]:
model = ElasticNet(alpha=0.1125, l1_ratio=0.055600000000000004, random_state=0,max_iter = 3869)
model.fit(train_models[['bfm_OrderProbit_5','BaselineOnly','avg_user_rating','avg_movie_rating','NMF','bfm_variational','NormalPredictor']], train_df.Prediction)
train_models["full_blend"] = np.clip(model.predict(train_models[['bfm_OrderProbit_5','BaselineOnly','avg_user_rating','avg_movie_rating','NMF','bfm_variational','NormalPredictor']]),1,5)
test_models["full_blend"] = np.clip(model.predict(test_models[['bfm_OrderProbit_5','BaselineOnly','avg_user_rating','avg_movie_rating','NMF','bfm_variational','NormalPredictor']]),1,5)

In [103]:
best_params =  [{'alpha': 0.1335, 'l1_ratio': 0.0001, 'max_iter': 8592},{'alpha': 0.1043, 'l1_ratio': 0.0734, 'max_iter': 4203},
{'alpha': 0.10070000000000001, 'l1_ratio': 0.0629, 'max_iter': 8133},{'alpha': 0.1019, 'l1_ratio': 0.0007, 'max_iter': 4191}]
usedIndexes = [['bfm_OrderProbit_5', 'bfm_variational', 'SlopeOne', 'avg_user_rating', 'user_rating_count', 'movie_rating_count', 'avg_movie_rating'],
 ['bfm_OrderProbit_5', 'bfm_variational', 'user_rating_count', 'movie_rating_count', 'NormalPredictor', 'BaselineOnly', 'avg_movie_rating'],
 ['bfm_OrderProbit_5', 'bfm_variational', 'user_rating_count', 'avg_movie_rating', 'movie_rating_count', 'BaselineOnly'],
 ['bfm_OrderProbit_5', 'bfm_variational', 'avg_movie_rating', 'NormalPredictor']]
for i, bi in enumerate(bins_train):
    params = best_params[i]
    usedIndex = usedIndexes[i]
    model = ElasticNet(**params)
    model.fit(train_models[usedIndex],train_df.Prediction)
    train_models.loc[train_models.row.isin(bi),"grouping_blend"] = np.clip(model.predict(train_models[train_models.row.isin(bi)][usedIndex]),1,5)
    test_models.loc[test_models.row.isin(bi),"grouping_blend"] = np.clip(model.predict(test_models[test_models.row.isin(bi)][usedIndex]),1,5)

## Final Blend


In [121]:
modelCombined = ElasticNet(**{'alpha': 0.15504731878406247, 'l1_ratio': 0.15642, 'max_iter': 10000})
modelCombined.fit(train_models[["full_blend","grouping_blend"]],train_df.Prediction)
test_models["final_blend"] = np.clip(modelCombined.predict(test_models[["full_blend","grouping_blend"]]),1,5)

## Results / Submission
If ```TRAIN_MODE``` is set to True, it will produce results based on our train-test split.  
If ```TRAIN_MODE``` is set to False, it will produce results for Kaggle submission

In [None]:
if TRAIN_MODE:
    for col in test_models.columns[3:]:
        print_scores(y_true, test_models[col], col)
else:
    submission_format = pd.read_csv(os.path.join(data_folder, 'sampleSubmission.csv'))
    submission_format['Prediction'] = test_models['final_blend']
    submission_format.to_csv(os.path.join(data_folder, 'final_submission.csv'), index=False)

Method: bfm_OrderProbit_6, RMSE: 0.97022, MAE: 0.7706
Method: bfm_OrderProbit_5, RMSE: 0.97013, MAE: 0.7704
Method: bfm_OrderProbit_6_small, RMSE: 0.97004, MAE: 0.7704
Method: bfm_OrderProbit_5_small, RMSE: 0.97018, MAE: 0.7707
Method: bfm_variational, RMSE: 0.97753, MAE: 0.7822
Method: KNNWithMeans, RMSE: 0.99032, MAE: 0.7925
Method: BaselineOnly, RMSE: 0.99900, MAE: 0.8047
Method: SlopeOne, RMSE: 0.99965, MAE: 0.8017
Method: NormalPredictor, RMSE: 1.48055, MAE: 1.1766
Method: SVD, RMSE: 0.98621, MAE: 0.7958
Method: avg_user_rating, RMSE: 1.09488, MAE: 0.9009
Method: avg_movie_rating, RMSE: 1.03095, MAE: 0.8398
Method: user_rating_count, RMSE: 136.13920, MAE: 120.8537
Method: movie_rating_count, RMSE: 1778.72522, MAE: 1553.7553
Method: user_rating_std, RMSE: 3.00811, MAE: 2.7948
Method: movie_rating_std, RMSE: 3.06818, MAE: 2.8477
Method: NMF, RMSE: 1.00298, MAE: 0.8123
Method: full_blend, RMSE: 0.96956, MAE: 0.7707
Method: grouping_blend, RMSE: 0.96951, MAE: 0.7690
Method: final_blen