# Average over items for each user

Idea: impute missing values by taking average rating over items for each user

In [8]:
import numpy as np

from data_handler import load_train_data, write_submission, get_prediction_ratings_from_matrix

## Load training data

In [9]:
X = load_train_data()
print(X[0:10,0:10])

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 5.]
 [0. 0. 0. 3. 0. 5. 0. 4. 0. 0.]
 [0. 0. 0. 2. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 2. 0. 0. 0. 5. 0. 3. 0. 0.]
 [0. 0. 0. 0. 0. 5. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 5. 0. 0. 0. 3.]
 [0. 0. 0. 1. 0. 5. 0. 5. 0. 0.]]


## For each user, predict its missing entries by the average of that user's available ratings

In [10]:
# y is vector containing the sums of each user's available ratings
y = X.sum(axis=1)
rating_counts = np.count_nonzero(X, axis =1)
average_per_user = y/rating_counts
X_pred = np.copy(X)
for row in range(X.shape[0]):
    no_rating = (X[row, :] == 0)
    X_pred[row, no_rating] = average_per_user[row]
print(X_pred[0:10, 0:10])

[[4.0434785 4.0434785 4.0434785 4.0434785 4.0434785 4.0434785 4.0434785
  4.0434785 4.0434785 5.       ]
 [3.771812  3.771812  3.771812  3.        3.771812  5.        3.771812
  4.        3.771812  3.771812 ]
 [3.5232558 3.5232558 3.5232558 2.        3.5232558 3.5232558 3.5232558
  3.5232558 3.5232558 3.5232558]
 [3.8       3.8       3.8       3.8       3.8       3.8       3.8
  3.8       3.8       3.8      ]
 [3.9850745 2.        3.9850745 3.9850745 3.9850745 5.        3.9850745
  3.        3.9850745 3.9850745]
 [4.3668637 4.3668637 4.3668637 4.3668637 4.3668637 5.        4.3668637
  4.3668637 4.3668637 4.3668637]
 [4.129032  4.129032  4.129032  4.129032  4.129032  4.129032  4.129032
  4.129032  4.129032  4.129032 ]
 [4.1153846 4.1153846 4.1153846 4.1153846 4.1153846 4.1153846 4.1153846
  4.1153846 4.1153846 4.1153846]
 [3.97191   3.97191   3.97191   3.97191   3.97191   5.        3.97191
  3.97191   3.97191   3.       ]
 [3.82243   3.82243   3.82243   1.        3.82243   5.        3.8

## Output submission file 

In [None]:
ratings = get_prediction_ratings_from_matrix(X_pred)
write_submission(ratings, 'submission_average_over_items_0.csv')