## Merging output of different methods in order to receive better prediction

In [1]:
import numpy as np
import scipy
import scipy.io
import scipy.sparse as sp
import datetime
import time
%load_ext autoreload
%autoreload 2
import sys
sys.path.insert(0, 'ALS_Approach')
from helpers import *

In [2]:
SGD_prediction_file = "ALS_Approach/20161220_171018 submission.csv"
ALS_prediction_file = "ALS_Approach/20161220_175252 submission.csv"
SGD_prediction = scipy.sparse.lil_matrix.todense(load_data(SGD_prediction_file))
ALS_prediction = scipy.sparse.lil_matrix.todense(load_data(ALS_prediction_file))

number of items: 10000, number of users: 1000
number of items: 10000, number of users: 1000


In [3]:
average_prediction = (SGD_prediction + ALS_prediction) / 2

In [4]:
def create_csv_submission(prediction, submission_file_path = "submission.csv"):
    """
        Creates an output file in csv format for submission to kaggle.

        Arguments:
            prediction: matrix W * Z^T
            submission_file_path: string name of .csv output file to be created
    """
    
    # find the non-zero ratings indices 
    nz_row_sr, nz_col_sr = prediction.nonzero()
    nz_sr = list(zip(nz_row_sr, nz_col_sr))
    
    submission_file_path = time.strftime("%Y%m%d_%H%M%S merged") + " " + submission_file_path
    with open(submission_file_path, 'w') as csvfile:
        fieldnames = ['Id', 'Prediction']
        writer = csv.DictWriter(csvfile, delimiter=",", fieldnames=fieldnames)
        writer.writeheader()
        for i, j in nz_sr:
            writer.writerow({'Id' : 'r' + str(i + 1) + '_' + 'c' + str(j + 1),
                             'Prediction' : str(prediction[i, j])})

In [5]:
create_csv_submission(average_prediction)