In [15]:
import numpy as np
import pandas as pd

## Setting up and preprocessing data

In [16]:
n = np.NaN  # setting non scores to np.Nan for better integration with numpy
rows_users_ = ['u1', 'u2', 'u3', 'u4', 'u5', 'u6', 'u7']
columns_series = ['Vikings', 'Breaking Bad', 'The Sopranos', 'Westworld',
                  'Game of Thrones', 'The Witcher', 'Skam', 'Les bureau']
all_users = np.array([
    [5,5,n,3,4,n,n,n],
    [4,3,3,4,5,2,3,3],
    [1,3,3,2,2,3,4,1],
    [n,3,n,n,n,5,n,n],
    [3,4,n,4,5,5,1,n],
    [4,5,2,5,3,2,2,3],
    [1,1,n,n,2,1,n,n],
])
df = pd.DataFrame(all_users, columns=columns_series, index=rows_users_)
df

Unnamed: 0,Vikings,Breaking Bad,The Sopranos,Westworld,Game of Thrones,The Witcher,Skam,Les bureau
u1,5.0,5.0,,3.0,4.0,,,
u2,4.0,3.0,3.0,4.0,5.0,2.0,3.0,3.0
u3,1.0,3.0,3.0,2.0,2.0,3.0,4.0,1.0
u4,,3.0,,,,5.0,,
u5,3.0,4.0,,4.0,5.0,5.0,1.0,
u6,4.0,5.0,2.0,5.0,3.0,2.0,2.0,3.0
u7,1.0,1.0,,,2.0,1.0,,


In [32]:
# calculating means for all users
users_mean = np.nanmean(all_users, axis=1)
df = pd.DataFrame(users_mean, index=rows_users_, columns=['means'])
df

Unnamed: 0,means
u1,4.25
u2,3.375
u3,2.375
u4,4.0
u5,3.666667
u6,3.25
u7,1.25


In [34]:
# calculating variance for all users
all_users_variance = np.empty(all_users.shape)
for i, user_scores in enumerate(all_users):
    all_users_variance[i] = user_scores - users_mean[i]
df = pd.DataFrame(all_users_variance, columns=columns_series, index=rows_users_)
df

Unnamed: 0,Vikings,Breaking Bad,The Sopranos,Westworld,Game of Thrones,The Witcher,Skam,Les bureau
u1,0.75,0.75,,-1.25,-0.25,,,
u2,0.625,-0.375,-0.375,0.625,1.625,-1.375,-0.375,-0.375
u3,-1.375,0.625,0.625,-0.375,-0.375,0.625,1.625,-1.375
u4,,-1.0,,,,1.0,,
u5,-0.666667,0.333333,,0.333333,1.333333,1.333333,-2.666667,
u6,0.75,1.75,-1.25,1.75,-0.25,-1.25,-1.25,-0.25
u7,-0.25,-0.25,,,0.75,-0.25,,


## Initializing active user

In [35]:
active_user = np.array([3, 2, 3, 0, 4, 1, 0, 5])
active_user_mean = np.nanmean(active_user)

In [37]:
# calculating variance for active user
active_users_variance = np.empty((1, 8))
for i, score in enumerate(active_user):
    active_users_variance[0, i] = score - active_user_mean
df = pd.DataFrame(active_users_variance)
df

Unnamed: 0,0,1,2,3,4,5,6,7
0,0.75,-0.25,0.75,-2.25,1.75,-1.25,-2.25,2.75


# Estimating new scores for active user
- Estimate the score this person is likely to give Westworld and Skam
- Solve the problem using Python and the corrcoef-function from Numpy

In [38]:
# calculating correlations between active user and all users
correlations = []
for other_users_variance in all_users_variance:
    temp_active = []
    temp_other = []
    for i in range(len(active_users_variance[0])):
        if not np.isnan(active_users_variance[0, i]) and not np.isnan(other_users_variance[i]):
            temp_active.append(active_users_variance[0, i])
            temp_other.append(other_users_variance[i])
    correlations_matrix = np.corrcoef(np.array(temp_active), np.array(temp_other))
    correlation = correlations_matrix[0][1]
    correlations.append(correlation)
df = pd.DataFrame(correlations)
df

Unnamed: 0,0
0,0.560612
1,0.276596
2,-0.643204
3,-1.0
4,0.433861
5,-0.091245
6,0.774597


In [39]:
# function for estimating score of series for active user
def estimate_score(kappa,average_vote_for_a, product_index):
    sum_correlation_variance = 0
    j = product_index  # index of product we want to predict the vote for
    for i in range(len(all_users_variance)):
        if not np.isnan(all_users_variance[i][j]):
            sum_correlation_variance += all_users_variance[i][j] * correlations[i]
    p_a_j = average_vote_for_a + kappa * sum_correlation_variance
    return p_a_j

In [40]:
westworld = 3
est_west = estimate_score(1,active_user_mean,westworld)
print('Active users estimated score for westworld:', round(est_west,2))

Active users estimated score for westworld: 1.95


In [41]:
skam = 6
est_skam = estimate_score(1,active_user_mean,skam)
print('Active users estimated score for skam:', round(est_skam,2))

Active users estimated score for skam: 0.06
