In [1]:
from IPython.display import Markdown, display # for markdown text
import json # for json methods
import pprint # to print human readable dictionary
import pandas as pd # for visualizations
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer # for TF-IDF

In [2]:
def printmd(string):
    display(Markdown(string))

# FETCHING THE DATA

## All the topics in our database

In [3]:
topics = json.load(open('topics.txt'))
pprint.pprint(topics)

{'16': 'openhardware',
 '18': 'Data Science',
 '19': 'Big Data',
 '20': 'Artificial Intelligence',
 '21': 'Business Intelligence',
 '31': 'arduino',
 '32': 'raspberry pi',
 '33': '3d printer',
 '36': 'Deep Learning',
 '37': 'IoT',
 '38': '3d printing',
 '39': 'open hardware',
 '56': 'Wearable',
 '57': 'Sustainable finance',
 '59': 'Sustainable Finance',
 '60': 'Climate Finance',
 '61': 'Green Bonds',
 '62': 'Green Economy'}


## The parameters in our scenario
We are observing an audience, which is defined by two constraints: a topic and a location. Our example is the audience in Italy interested in the topic: Arduino.

In [4]:
TOPIC_ID=31 # topic = arduino
LOCATION = 'italy'
SIGNAL_STRENGTH = 0 # this value indicates the min number of influencers the retrieved audience members follow within the topic
LIMIT = 50 # number of audience members to consider
TESTING_SET_SIZE=10
HOW_MANY_TWEETS = 50 # amount of most recent tweets (including retweets) to be retrieved to consider in our recommendation engine
INCLUDE_RETWEETS = True

In [5]:
rated_audience_dict = json.load(open('rated_audience.txt'))
unrated_audience_dict = json.load(open('unrated_audience.txt'))

In [6]:
printmd("## An example Twitter profile with all the data fields at this point.")
printmd("### Topic: " + topics[str(TOPIC_ID)])
pprint.pprint(next (iter (rated_audience_dict.values())))

## An example Twitter profile with all the data fields at this point.

### Topic: arduino

{'description': 'Chief Innovation Officer at NTT DATA Italia , proud father of '
                'two beautiful girls, a professor, a technology evangelist, an '
                'holistic thinker and a gentleman.',
 'ground_truth_rating': 1.0,
 'hashtags': 'Milano CheTempoFa Milano CheTempoFa foi13 Cefriel fvw2013 '
             'fvw2013 fvw2013 storytelling Vajont Milano CheTempoFa '
             'StartupWeekend GrandC4Picasso makerfairerome MakerFaireRome '
             'GrandC4Picasso GrandC4Picasso',
 'influencers': '266400754 84094835 767285',
 'location': 'Milan, Italy',
 'screen_name': 'funkysurfer',
 'tweets': 'at Cascina Matiot Disorders of Con Edi Touch is easier than yours '
           'via Startup weekend 3 days at Milan Decoded Milan capital of the '
           'street October 2013 Rain Massima Minima The new science is an open '
           'narration thanks to This project was born via Milan handmade with '
           'Craft Camp of a Pisa ItCup A Roncade in Veneto via Oc

In [7]:
# Computes the TF-IDF values for the given corpus.
tf = TfidfVectorizer(analyzer='word', ngram_range=(1,1), min_df = 0, stop_words = 'english');
def get_tfidf(corpus):
    return tf.fit_transform(corpus.copy()).todense();

## Separate the data into different arrays

In [8]:
screen_names = [aud['screen_name'] for aud in rated_audience_dict.values()]
influencers_corpus = [aud['influencers'] for aud in rated_audience_dict.values()]
tweets_corpus = [aud['tweets'] for aud in rated_audience_dict.values()]
hashtags_corpus = [aud['hashtags'] for aud in rated_audience_dict.values()]
description_corpus = [aud['description'] for aud in rated_audience_dict.values()]
ground_truth_ratings = np.array([2*aud['ground_truth_rating'] for aud in rated_audience_dict.values()])
#print(tweets_corpus)

In [9]:
# INFLUENCER MATRICES
tfidf_influencer_matrix = np.array(get_tfidf(influencers_corpus)).T;
binary_influencer_matrix = tfidf_influencer_matrix.copy()
binary_influencer_matrix[binary_influencer_matrix>0]=1
# TWEET MATRICES
tfidf_tweets_matrix=np.array(get_tfidf(tweets_corpus)).T;
binary_tweets_matrix = tfidf_tweets_matrix.copy()
binary_tweets_matrix[binary_tweets_matrix>0]=1
# DESCRIPTION MATRICES
tfidf_description_matrix = np.array(get_tfidf(description_corpus)).T;
binary_description_matrix = tfidf_description_matrix.copy()
binary_description_matrix[binary_description_matrix>0]=1
# HASHTAGS MATRICES
tfidf_hashtags_matrix = np.array(get_tfidf(hashtags_corpus)).T;
binary_hashtags_matrix = tfidf_hashtags_matrix.copy()
binary_hashtags_matrix[binary_hashtags_matrix>0]=1
printmd("#### Matrix dimensions")
printmd("Influencer matrix: " + str(tfidf_influencer_matrix.shape))
printmd("Tweets matrix: " +str(tfidf_tweets_matrix.shape))
printmd("Description matrix: " +str(tfidf_description_matrix.shape))
printmd("Hashtags matrix: " +str(tfidf_hashtags_matrix.shape))

#### Matrix dimensions

Influencer matrix: (22, 50)

Tweets matrix: (4859, 50)

Description matrix: (308, 50)

Hashtags matrix: (1158, 50)

# METHODS FOR RECOMMENDATION

# Method A
## Logistic Regression using the similarities coming from user profiling (relaxed parameters)

In [10]:
from tfidf_vectorizer import TwitterAccountSimilarityFinder
import mord as md
from sklearn import datasets, linear_model

In [11]:
from scipy import sparse
def get_user_profile(tfidf_matrix, ratings):
    return np.multiply(tfidf_matrix.T,ratings[:, np.newaxis]).sum(axis=0).reshape(1,tfidf_matrix.shape[0])

In [12]:
def cos_sim(tfidf_matrix, user_profile):
    norm = np.linalg.norm(user_profile);
    return 1.0* tfidf_matrix.T.dot(user_profile.T)/ norm
    

In [15]:
SUBSAMPLING_COUNT = 20 # we will subsample this many times and take the average error for evaluation
rated_audience_dict_ids = list(rated_audience_dict.keys())

In [17]:
mean_square_errors = []
prints_enabled = False
for iteration in range(SUBSAMPLING_COUNT):
    predictions=[]
    TESTING_SET_IDS = np.random.choice(rated_audience_dict_ids, size=TESTING_SET_SIZE, replace=False)
    if (prints_enabled): print(TESTING_SET_IDS)
    ratings = np.array([0 if id in TESTING_SET_IDS else int(2*aud['ground_truth_rating']) for id, aud in rated_audience_dict.items()])
    if (prints_enabled): display(ratings)
        
    # FIND USER PROFILES
    influencer_user_profile = get_user_profile(tfidf_influencer_matrix, ratings)
    tweets_user_profile = get_user_profile(tfidf_tweets_matrix, ratings)
    description_user_profile = get_user_profile(tfidf_description_matrix, ratings)
    hashtags_user_profile = get_user_profile(tfidf_hashtags_matrix, ratings)

    # FIND THE COSINE SIMILARITIES
    # THESE WILL THEN BE USED AS FEATURES IN REGRESSION
    influencerSimilarities = cos_sim(tfidf_influencer_matrix, influencer_user_profile)
    tweetSimilarities = cos_sim(tfidf_tweets_matrix, tweets_user_profile)
    descriptionSimilarities = cos_sim(tfidf_description_matrix, description_user_profile)
    hashtagSimilarities = cos_sim(tfidf_hashtags_matrix, hashtags_user_profile)
    
    profile_count = len(rated_audience_dict)
    avgTweetSim = np.mean([e for e in tweetSimilarities if e!=0]) # if we cannot fetch tweets of a profile, we assign his tweets an average similarity score
    avgDescriptionSim = np.mean([e for e in descriptionSimilarities if e!=0]) # if the description of a profile is empty, we assign his descripion an average similarity score
    avgHashtagSim = np.mean([e for e in hashtagSimilarities if e!=0]) # if we cannot fetch tweets of a profile, we assign his hashtags an average similarity score
    
    profiles =[]
    training_indices=[]
    for i in range(profile_count):
        if tweetSimilarities[i]==0: tweetSimilarities[i]= avgTweetSim
        if descriptionSimilarities[i]==0: descriptionSimilarities[i]= avgDescriptionSim
        if hashtagSimilarities[i]==0: hashtagSimilarities[i] = avgHashtagSim
        if ratings[i]!=0: training_indices.append(i)
        profile = {
        'index': i,
        'screen_name':screen_names[i],
        'set': "testing" if ratings[i]==0 else "training",
        'ground_truth':ground_truth_ratings[i],
        'infSim': influencerSimilarities[i].item(0),
        'tweetSim': tweetSimilarities[i].item(0),
        'descSim': descriptionSimilarities[i].item(0),
        'hashtagSim': hashtagSimilarities[i].item(0),
        'score': 0
        }
        count=0
        for key in profile.keys():
            if 'Sim' in key: 
                profile['score']+=profile[key]
                count+=1
        profile['score']/=count*1.0
        profiles.append(profile)

    profiles=pd.DataFrame(profiles)
    if (prints_enabled):
        display(profiles[['screen_name','set','ground_truth','score','infSim','tweetSim','descSim','hashtagSim']].sort_values(by='score',ascending=False).round(2))
        printmd("### Naive approach\n Score is the average of similarities.")
        profiles.plot.scatter('score','ground_truth') # score is the average of the similarities
    
    # Combine the similarities and use them as features to feed to a logistic regressor.
    # uncomment to add the similarity into regression

    featureVectors = influencerSimilarities
    featureVectors = np.column_stack((featureVectors,tweetSimilarities))
    #featureVectors = np.column_stack((featureVectors, descriptionSimilarities))
    #featureVectors = np.column_stack((featureVectors, hashtagSimilarities))

    X = np.array([featureVectors[i] for i in training_indices])
    X_binary = X.copy()
    X_binary[X_binary>0]=1
    
    Y = np.array([ratings[i] for i in training_indices])
    Y_binary = [round(e/10) for e in Y]
    
    # Ordinal Regression
    #classifier = linear_model.LinearRegression()
    # Logistic Regression
    #classifier = md.LogisticIT() #Default parameters: alpha=1.0, verbose=0, maxiter=10000
    
    classifier = linear_model.LogisticRegression(C=1e5)
    #print(X)
    #print(Y)
    classifier.fit(X, Y)

    predictions = classifier.predict(featureVectors)
    profiles['predicted_rating']=predictions
    profiles["squared_error"]=(profiles["ground_truth"]-profiles["predicted_rating"])**2
    if (prints_enabled):
        display(profiles[['screen_name','set','ground_truth', 'predicted_rating','squared_error','score','infSim','tweetSim','descSim','hashtagSim']].sort_values(by='score',ascending=False).round(2))

    evaluation = pd.DataFrame()
    evaluation['mean_squared_error']=profiles.groupby(by='set')['squared_error'].mean()
    mean_square_error = {
        'training':evaluation.filter(like='training', axis=0)['mean_squared_error'].iloc[0],
        'testing':evaluation.filter(like='testing', axis=0)['mean_squared_error'].iloc[0],
        }
    mean_square_errors.append(mean_square_error)
    
Avg_MSE_training = np.mean([e['training'] for e in mean_square_errors])
Avg_MSE_testing = np.mean([e['testing'] for e in mean_square_errors])
print("Avg Training Mean Squared Error: " + str(Avg_MSE_training))
print("Avg Testing Mean Squared Error: " + str(Avg_MSE_testing))

Avg Training Mean Squared Error: 5.84125
Avg Testing Mean Squared Error: 13.575


# MATRIX METHODS

In [18]:
import math

In [19]:
# Matrix Factorization via multiplicative update rule
# Original Author: Ali Taylan Cemgil from Bogazici University
def nmf_kl_multiplicative(D, M, W, H, EPOCH=5000):
    MD = D.copy()
    MD[M==0] = 0
    for e in range(EPOCH):
        Xhat = W.dot(H)
        W=W*np.array(((MD/Xhat).dot(H.T)/np.dot(M, H.T)))
        Xhat = W.dot(H)
        H = H*np.array((W.T.dot(MD/Xhat)/np.dot(W.T, M)))
        #print(np.sum(np.abs(MD - M*Xhat))/np.sum(M))
    return W, H

In [20]:
# Regularized Matrix Factorization 
def matrix_factorization(R, P, Q, K, steps=5000, alpha=0.0002, beta=0.15):
    Q = Q.T
    for step in range(steps):
        for i in range(len(R)):
            for j in range(len(R[i])):
                if R[i][j] > 0:
                    eij = R[i][j] - np.dot(P[i,:],Q[:,j])
                    for k in range(K):
                        if(i==0):
                            P[i][k] = P[i][k] + alpha * (2 * eij * Q[k][j] - beta * P[i][k])
                        else:         
                            P[i][k] = P[i][k] + alpha * (2 * eij * Q[k][j] - beta * P[i][k])
                        if(k==0):
                            Q[k][j] = Q[k][j] + alpha * (2 * eij * P[i][k] - beta * Q[k][j])
                        else:
                            Q[k][j] = Q[k][j] + alpha * (2 * eij * P[i][k] - beta * Q[k][j])

#         eR = np.dot(P,Q)
#         e = 0
#         for i in range(len(R)):
#             for j in range(len(R[i])):
#                 if R[i][j] > 0:
#                     e = e + pow(R[i][j] - np.dot(P[i,:],Q[:,j]), 2)
#                     for k in range(K):
#                         e = e + (beta/2) * (pow(P[i][k],2) + pow(Q[k][j],2))
#         if e < 0.001:
#             break
    return P, Q.T

In [21]:
tfidf_data_matrix =[]
tfidf_data_matrix = tfidf_influencer_matrix # first add influencers
tfidf_data_matrix = np.append(tfidf_data_matrix,tfidf_tweets_matrix,axis=0) # then tweets
tfidf_data_matrix = np.append(tfidf_data_matrix,tfidf_description_matrix,axis=0) # then descriptions
tfidf_data_matrix = np.append(tfidf_data_matrix,tfidf_hashtags_matrix,axis=0) # then hashtags

binary_data_matrix =[]
binary_data_matrix = binary_influencer_matrix # first add influencers
binary_data_matrix = np.append(binary_data_matrix,binary_tweets_matrix,axis=0) # then tweets
binary_data_matrix = np.append(binary_data_matrix,binary_description_matrix,axis=0) # then descriptions
binary_data_matrix = np.append(binary_data_matrix,binary_hashtags_matrix,axis=0) # then hashtags

#display(pd.DataFrame(binary_data_matrix))

# Method B 
## Non-negative Matrix Factorization (NMF) without the ratings row = Document Clustering

In [22]:
mean_square_errors = []
prints_enabled = False
#Rank
R = 10
# Data
Nr = tfidf_data_matrix.shape[0]
Nc = tfidf_data_matrix.shape[1]

for iteration in range(SUBSAMPLING_COUNT):
    predictions=[]
    TESTING_SET_IDS = np.random.choice(rated_audience_dict_ids, size=TESTING_SET_SIZE, replace=False)
    #if (prints_enabled): print(TESTING_SET_IDS)
    ratings = np.array([0 if id in TESTING_SET_IDS else int(2*aud['ground_truth_rating']) for id, aud in rated_audience_dict.items()])
    if (prints_enabled): 
        printmd("Ratings:")
        display(ratings)

    # Initialize W and H with random numbers
    W = np.random.rand(Nr, R)*100
    H = np.random.rand(R, Nc)*100

    Mask = np.ones_like(tfidf_data_matrix)
    Mask[np.isnan(tfidf_data_matrix)] = 0

    W,H = nmf_kl_multiplicative(tfidf_data_matrix, Mask, W, H, EPOCH=1)

    # Cluster numbers holds which cluster each user is assigned to
    cluster_numbers = np.argmax(H,axis=0)
    # Ratings each cluster is assigned to (initialized to zeros)
    cluster_ratings = np.zeros(R)

    for i in range(len(cluster_numbers)):
        cluster_no = cluster_numbers[i]
        cluster_ratings[cluster_no]+=1.0*ratings[i] 

    for i in range(R):
        cluster_size = float(len(cluster_numbers[cluster_numbers==i]))
        if cluster_size!=0:
            cluster_ratings[i]/=cluster_size
   
    predictions = [np.around(cluster_ratings[cluster_no]) for cluster_no in cluster_numbers]    
    # Found the cluster ratings
    cluster_ratings = [{'cluster_no':i, 'rating':cluster_ratings[i]} for i in range(R)]
    cluster_ratings = pd.DataFrame(cluster_ratings).round(2)
    if (prints_enabled): 
        printmd("Cluster numbers:")
        display(cluster_numbers)
        printmd("Cluster ratings:")
        display(cluster_ratings)
    
    results = pd.DataFrame()
    results['set'] = ['training' if ratings[i]!=0 else 'testing' for i in range(len(ratings)) ]
    results['ground_truth']=ground_truth_ratings
    results['predicted_rating']=predictions
    results["squared_error"]=(results["ground_truth"]-results["predicted_rating"])**2
    
    if (prints_enabled):
        display(results[['set','ground_truth','predicted_rating','squared_error']].round(2))

    evaluation = pd.DataFrame()
    evaluation['mean_squared_error']=results.groupby(by='set')['squared_error'].mean()
    mean_square_error = {
        'training':evaluation.filter(like='training', axis=0)['mean_squared_error'].iloc[0],
        'testing':evaluation.filter(like='testing', axis=0)['mean_squared_error'].iloc[0],
        }
    mean_square_errors.append(mean_square_error)
    
Avg_MSE_training = np.mean([e['training'] for e in mean_square_errors])
Avg_MSE_testing = np.mean([e['testing'] for e in mean_square_errors])
print("Avg Training Mean Squared Error: " + str(Avg_MSE_training))
print("Avg Testing Mean Squared Error: " + str(Avg_MSE_testing))
    

Avg Training Mean Squared Error: 6.87875
Avg Testing Mean Squared Error: 9.5


# Method D 
## Non-negative Matrix Factorization (NMF) with the ratings row

In [23]:
mean_square_errors = []
prints_enabled = False
#Rank
R = 10
# Data
Nr = tfidf_data_matrix.shape[0]+1
Nc = tfidf_data_matrix.shape[1]

for iteration in range(SUBSAMPLING_COUNT):
    predictions=[]
    TESTING_SET_IDS = np.random.choice(rated_audience_dict_ids, size=TESTING_SET_SIZE, replace=False)
    #if (prints_enabled): print(TESTING_SET_IDS)
    ratings = np.array([0 if id in TESTING_SET_IDS else int(2*aud['ground_truth_rating']) for id, aud in rated_audience_dict.items()])
    if (prints_enabled): 
        printmd("Ratings:")
        display(ratings)

    tfidf_data_matrix_with_ratings = ratings.reshape(1,len(ratings))
    tfidf_data_matrix_with_ratings = np.append(tfidf_data_matrix_with_ratings,tfidf_data_matrix,axis=0)

    # Initialize W and H with random numbers
    W = np.random.rand(Nr, R)*100
    H = np.random.rand(R, Nc)*100

    Mask = np.ones_like(tfidf_data_matrix_with_ratings)
    Mask[np.isnan(tfidf_data_matrix_with_ratings)] = 0

    W,H = nmf_kl_multiplicative(tfidf_data_matrix_with_ratings, Mask, W, H, EPOCH=1)
    Xhat = W.dot(H)
    predictions = Xhat[0]
    predictions = np.around(10.0*predictions/max(predictions))
    
    results = pd.DataFrame()
    results['set'] = ['training' if ratings[i]!=0 else 'testing' for i in range(len(ratings)) ]
    results['ground_truth']=ground_truth_ratings
    results['predicted_rating']=predictions
    results["squared_error"]=(results["ground_truth"]-results["predicted_rating"])**2
    
    if (prints_enabled):
        display(results[['set','ground_truth','predicted_rating','squared_error']].round(2))

    evaluation = pd.DataFrame()
    evaluation['mean_squared_error']=results.groupby(by='set')['squared_error'].mean()
    mean_square_error = {
        'training':evaluation.filter(like='training', axis=0)['mean_squared_error'].iloc[0],
        'testing':evaluation.filter(like='testing', axis=0)['mean_squared_error'].iloc[0],
        }
    mean_square_errors.append(mean_square_error)
    
Avg_MSE_training = np.mean([e['training'] for e in mean_square_errors])
Avg_MSE_testing = np.mean([e['testing'] for e in mean_square_errors])
print("Avg Training Mean Squared Error: " + str(Avg_MSE_training))
print("Avg Testing Mean Squared Error: " + str(Avg_MSE_testing))

Avg Training Mean Squared Error: 7.8525
Avg Testing Mean Squared Error: 9.49


In [24]:
mean_square_errors = []
prints_enabled = False
#Rank
R = 10
# Data
Nr = tfidf_data_matrix.shape[0]+1
Nc = tfidf_data_matrix.shape[1]
for T in np.arange(1,10,0.1):
    for iteration in range(1):
        predictions=[]
        TESTING_SET_IDS = np.random.choice(rated_audience_dict_ids, size=TESTING_SET_SIZE, replace=False)
        #if (prints_enabled): print(TESTING_SET_IDS)
        ratings = np.array([0 if id in TESTING_SET_IDS else int(2*aud['ground_truth_rating']) for id, aud in rated_audience_dict.items()])
        if (prints_enabled): 
            printmd("Ratings:")
            display(ratings)

        tfidf_data_matrix_with_ratings = ratings.reshape(1,len(ratings))
        tfidf_data_matrix_with_ratings = np.append(tfidf_data_matrix_with_ratings,tfidf_data_matrix,axis=0)

        # Initialize W and H with random numbers
        W = np.random.rand(Nr, R)*100
        H = np.random.rand(R, Nc)*100

        Mask = np.ones_like(tfidf_data_matrix_with_ratings)
        Mask[np.isnan(tfidf_data_matrix_with_ratings)] = 0

        W,H = nmf_kl_multiplicative(tfidf_data_matrix_with_ratings, Mask, W, H, EPOCH=1)
        Xhat = W.dot(H)
        predictions = Xhat[0]
        predictions = np.around(10.0*predictions/max(predictions))


        results = pd.DataFrame()
        results['set'] = ['training' if ratings[i]!=0 else 'testing' for i in range(len(ratings)) ]
        results['ground_truth']=[0 if r<=T else 1 for r in ground_truth_ratings] # binary ground truth
        results['predicted_rating']=[0 if r<=T else 1 for r in predictions] # binary predictions
        results["squared_error"]=(results["ground_truth"]-results["predicted_rating"])**2

        if (prints_enabled):
            display(results[['set','ground_truth','predicted_rating','squared_error']].round(2))

        evaluation = pd.DataFrame()
        evaluation['mean_squared_error']=results.groupby(by='set')['squared_error'].mean()
        mean_square_error = {
            'training':evaluation.filter(like='training', axis=0)['mean_squared_error'].iloc[0],
            'testing':evaluation.filter(like='testing', axis=0)['mean_squared_error'].iloc[0],
            }
        mean_square_errors.append(mean_square_error)
        
    #Avg_MSE_training = np.mean([e['training'] for e in mean_square_errors])
    Avg_MSE_testing = np.mean([e['testing'] for e in mean_square_errors])
    #print("Avg Training Mean Squared Error: " + str(Avg_MSE_training))
    print("Avg Testing Mean Squared Error: " + str(Avg_MSE_testing.round(2)) + " Threshold: " + str(T))
    


Avg Testing Mean Squared Error: 0.2 Threshold: 1.0
Avg Testing Mean Squared Error: 0.15 Threshold: 1.1
Avg Testing Mean Squared Error: 0.1 Threshold: 1.2
Avg Testing Mean Squared Error: 0.1 Threshold: 1.3
Avg Testing Mean Squared Error: 0.08 Threshold: 1.4
Avg Testing Mean Squared Error: 0.07 Threshold: 1.5
Avg Testing Mean Squared Error: 0.07 Threshold: 1.6
Avg Testing Mean Squared Error: 0.06 Threshold: 1.7
Avg Testing Mean Squared Error: 0.06 Threshold: 1.8
Avg Testing Mean Squared Error: 0.07 Threshold: 1.9
Avg Testing Mean Squared Error: 0.09 Threshold: 2.0
Avg Testing Mean Squared Error: 0.1 Threshold: 2.1
Avg Testing Mean Squared Error: 0.12 Threshold: 2.2
Avg Testing Mean Squared Error: 0.13 Threshold: 2.3
Avg Testing Mean Squared Error: 0.15 Threshold: 2.4
Avg Testing Mean Squared Error: 0.16 Threshold: 2.5
Avg Testing Mean Squared Error: 0.15 Threshold: 2.6
Avg Testing Mean Squared Error: 0.17 Threshold: 2.7
Avg Testing Mean Squared Error: 0.17 Threshold: 2.8
Avg Testing Mean

In [25]:
mean_square_errors = []
prints_enabled = False
K = 16
for iteration in range(SUBSAMPLING_COUNT):
    predictions=[]
    TESTING_SET_IDS = np.random.choice(rated_audience_dict_ids, size=TESTING_SET_SIZE, replace=False)
    #if (prints_enabled): print(TESTING_SET_IDS)
    ratings = np.array([0 if id in TESTING_SET_IDS else int(2*aud['ground_truth_rating']) for id, aud in rated_audience_dict.items()])
    if (prints_enabled): 
        printmd("Ratings:")
        display(ratings)

    tfidf_data_matrix_with_ratings = ratings.reshape(1,len(ratings))
    tfidf_data_matrix_with_ratings = np.append(tfidf_data_matrix_with_ratings,tfidf_influencer_matrix,axis=0)

    # Initialize W and H with random numbers    
    R = tfidf_data_matrix_with_ratings
    N= len(R)
    M = len(R[0])
    steps = 2000

    P = np.random.rand(N,K)
    Q = np.random.rand(M,K)

    nP, nQ = matrix_factorization(R, P, Q, K,steps)
    nR = np.dot(nP, nQ.T)

    predictions =np.round(nR[0].reshape(1,len(nR[0])))[0]
    print(predictions)
    
    results = pd.DataFrame()
    results['set'] = ['training' if ratings[i]!=0 else 'testing' for i in range(len(ratings)) ]
    results['ground_truth']=ground_truth_ratings
    results['predicted_rating']=predictions
    results["squared_error"]=(results["ground_truth"]-results["predicted_rating"])**2
    
    if (prints_enabled):
        display(results[['set','ground_truth','predicted_rating','squared_error']].round(2))

    evaluation = pd.DataFrame()
    evaluation['mean_squared_error']=results.groupby(by='set')['squared_error'].mean()
    mean_square_error = {
        'training':evaluation.filter(like='training', axis=0)['mean_squared_error'].iloc[0],
        'testing':evaluation.filter(like='testing', axis=0)['mean_squared_error'].iloc[0],
        }
    mean_square_errors.append(mean_square_error)
    
Avg_MSE_training = np.mean([e['training'] for e in mean_square_errors])
Avg_MSE_testing = np.mean([e['testing'] for e in mean_square_errors])
print("Avg Training Mean Squared Error: " + str(Avg_MSE_training))
print("Avg Testing Mean Squared Error: " + str(Avg_MSE_testing))

[  2.   4.   6.   4.   5.  10.  10.   9.   3.   5.   6.   8.   7.   3.   2.
   2.   6.   6.   2.   5.   8.   2.   1.   4.   8.   9.   5.   6.   4.   5.
   2.   7.   5.   6.   2.   4.   5.   4.   3.   8.   4.   2.   3.   3.   2.
   6.   5.  10.   8.   5.]
[  2.   5.   6.   4.   5.  10.  10.   9.   3.   5.   4.   8.   7.   3.   2.
   2.   5.   2.   4.   5.   8.   2.   1.   4.   7.   3.   5.   4.   4.   4.
   2.   6.   4.   6.   2.   7.   1.   4.   2.   8.   5.   2.   4.   3.   2.
   6.   5.   4.   8.   5.]
[  2.   1.   6.   4.   5.   2.  10.   9.   3.   5.   6.   3.   7.   3.   2.
   4.   5.   2.   2.   5.   8.   2.   3.   6.   7.   9.   5.   9.   4.   3.
   2.   7.   5.   6.   4.   7.   4.   4.   2.   8.   4.   2.   3.   2.   2.
   6.   5.  10.   8.   5.]
[  2.   1.   5.   4.   5.  10.  10.   9.   3.   5.   6.   3.   5.   3.   2.
   2.   5.   2.   4.   5.   8.   2.   1.   4.   8.   9.   5.   5.   4.   3.
   2.   7.   5.   6.   2.   7.   3.   4.   2.   5.   4.   2.   4.   3.   2.
   6.  

In [26]:
mean_square_errors = []
prints_enabled = False
K = 16
for iteration in range(SUBSAMPLING_COUNT):
    print(str(iteration))
    predictions=[]
    TESTING_SET_IDS = np.random.choice(rated_audience_dict_ids, size=TESTING_SET_SIZE, replace=False)
    #if (prints_enabled): print(TESTING_SET_IDS)
    ratings = np.array([0 if id in TESTING_SET_IDS else int(2*aud['ground_truth_rating']) for id, aud in rated_audience_dict.items()])
    if (prints_enabled): 
        printmd("Ratings:")
        display(ratings)
        
    # FIND USER PROFILES
    influencer_user_profile = get_user_profile(tfidf_influencer_matrix, ratings)
    tweets_user_profile = get_user_profile(tfidf_tweets_matrix, ratings)
    description_user_profile = get_user_profile(tfidf_description_matrix, ratings)
    hashtags_user_profile = get_user_profile(tfidf_hashtags_matrix, ratings)

    # FIND THE COSINE SIMILARITIES
    # THESE WILL THEN BE USED AS FEATURES IN REGRESSION
    influencerSimilarities = cos_sim(tfidf_influencer_matrix, influencer_user_profile)
    tweetSimilarities = cos_sim(tfidf_tweets_matrix, tweets_user_profile)
    descriptionSimilarities = cos_sim(tfidf_description_matrix, description_user_profile)
    hashtagSimilarities = cos_sim(tfidf_hashtags_matrix, hashtags_user_profile)
    
    avgTweetSim = np.mean([e for e in tweetSimilarities if e!=0]) # if we cannot fetch tweets of a profile, we assign his tweets an average similarity score
    avgDescriptionSim = np.mean([e for e in descriptionSimilarities if e!=0]) # if the description of a profile is empty, we assign his descripion an average similarity score
    avgHashtagSim = np.mean([e for e in hashtagSimilarities if e!=0]) # if we cannot fetch tweets of a profile, we assign his hashtags an average similarity score
    tweetSimilarities[tweetSimilarities==0]=avgTweetSim
    descriptionSimilarities[descriptionSimilarities==0]=avgDescriptionSim
    hashtagSimilarities[hashtagSimilarities==0]=avgHashtagSim

    
    tfidf_data_matrix_with_ratings = ratings.reshape(1,len(ratings))
    #tfidf_data_matrix_with_ratings = np.append(tfidf_data_matrix_with_ratings,influencerSimilarities.T,axis=0)
    tfidf_data_matrix_with_ratings = np.append(tfidf_data_matrix_with_ratings,tweetSimilarities.T,axis=0)
    tfidf_data_matrix_with_ratings = np.append(tfidf_data_matrix_with_ratings,descriptionSimilarities.T,axis=0)
    tfidf_data_matrix_with_ratings = np.append(tfidf_data_matrix_with_ratings,hashtagSimilarities.T,axis=0)
    display(pd.DataFrame(tfidf_data_matrix_with_ratings))

    # Initialize W and H with random numbers    
    R = tfidf_data_matrix_with_ratings
    N= len(R)
    M = len(R[0])
    steps = 2000

    P = np.random.rand(N,K)
    Q = np.random.rand(M,K)

    nP, nQ = matrix_factorization(R, P, Q, K,steps)
    nR = np.dot(nP, nQ.T)

    predictions =np.round(nR[0].reshape(1,len(nR[0])))[0]
    #print(predictions)
    
    results = pd.DataFrame()
    results['set'] = ['training' if ratings[i]!=0 else 'testing' for i in range(len(ratings)) ]
    results['ground_truth']=ground_truth_ratings
    results['predicted_rating']=predictions
    results["squared_error"]=(results["ground_truth"]-results["predicted_rating"])**2
    
    #if (prints_enabled):
    #display(results[['set','ground_truth','predicted_rating','squared_error']].round(2))

    evaluation = pd.DataFrame()
    evaluation['mean_squared_error']=results.groupby(by='set')['squared_error'].mean()
    mean_square_error = {
        'training':evaluation.filter(like='training', axis=0)['mean_squared_error'].iloc[0],
        'testing':evaluation.filter(like='testing', axis=0)['mean_squared_error'].iloc[0],
        }
    mean_square_errors.append(mean_square_error)
    
Avg_MSE_training = np.mean([e['training'] for e in mean_square_errors])
Avg_MSE_testing = np.mean([e['testing'] for e in mean_square_errors])
print("Avg Training Mean Squared Error: " + str(Avg_MSE_training))
print("Avg Testing Mean Squared Error: " + str(Avg_MSE_testing))

0


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,2.0,1.0,6.0,0.0,0.0,10.0,10.0,10.0,0.0,5.0,...,4.0,2.0,0.0,3.0,2.0,6.0,5.0,10.0,8.0,0.0
1,0.202226,0.229539,0.374722,0.09493,0.138711,0.271734,0.335465,0.431838,0.147519,0.238388,...,0.210672,0.314559,0.125145,0.154266,0.284111,0.213084,0.241173,0.387186,0.302048,0.088992
2,0.120826,0.165675,0.201757,0.116149,0.101942,0.361705,0.284369,0.30086,0.168906,0.217022,...,0.173389,0.055337,0.02128,0.168531,0.055337,0.296727,0.219106,0.30607,0.290601,0.085894
3,0.060263,0.031697,0.180316,0.006864,0.005171,0.286247,0.287788,0.303995,0.016504,0.232666,...,0.112442,0.056221,0.058934,0.084331,0.067481,0.171794,0.140552,0.281105,0.237704,0.031782


1


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,2.0,1.0,6.0,4.0,0.0,10.0,10.0,10.0,3.0,0.0,...,4.0,2.0,4.0,3.0,2.0,6.0,5.0,10.0,8.0,0.0
1,0.195031,0.230967,0.3789,0.173357,0.138385,0.26442,0.334211,0.442817,0.203834,0.143525,...,0.208781,0.321067,0.197413,0.156327,0.28859,0.219996,0.223308,0.380697,0.287009,0.095684
2,0.121916,0.17779,0.181131,0.226093,0.07536,0.342794,0.282109,0.27612,0.252589,0.106894,...,0.118502,0.051345,0.1267,0.198215,0.051345,0.26055,0.214043,0.351355,0.254474,0.085638
3,0.05786,0.034755,0.193159,0.107957,0.015761,0.278738,0.275109,0.307752,0.096185,0.082065,...,0.110459,0.053978,0.216695,0.080968,0.056183,0.164941,0.134946,0.271917,0.222788,0.020314


2


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,0.0,0.0,6.0,4.0,5.0,10.0,10.0,0.0,3.0,5.0,...,4.0,2.0,4.0,3.0,0.0,6.0,5.0,0.0,8.0,5.0
1,0.160986,0.205209,0.339881,0.188362,0.241088,0.280406,0.358178,0.236787,0.217102,0.246956,...,0.219828,0.250778,0.214964,0.160957,0.208511,0.242687,0.243482,0.181496,0.320656,0.205752
2,0.081862,0.173815,0.183451,0.196014,0.237817,0.350479,0.279783,0.04168,0.231298,0.211396,...,0.17785,0.050172,0.154885,0.168902,0.004462,0.287272,0.244868,0.109506,0.27646,0.22674
3,0.004097,0.005823,0.2001,0.120906,0.157858,0.294211,0.292781,0.032693,0.106283,0.24572,...,0.117986,0.056975,0.189383,0.085462,0.012057,0.174097,0.142437,0.002138,0.24119,0.175782


3


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,2.0,1.0,6.0,4.0,0.0,10.0,10.0,10.0,3.0,5.0,...,4.0,2.0,4.0,3.0,2.0,6.0,5.0,10.0,8.0,5.0
1,0.203548,0.229987,0.366042,0.175732,0.146978,0.26448,0.331039,0.416036,0.20528,0.240266,...,0.218672,0.29768,0.207987,0.156429,0.264641,0.228455,0.23657,0.375901,0.298101,0.192841
2,0.124586,0.177887,0.190718,0.204183,0.082176,0.320438,0.287406,0.279691,0.232866,0.228996,...,0.163393,0.05231,0.131439,0.186437,0.05231,0.26712,0.228325,0.333876,0.257035,0.191295
3,0.05734,0.034443,0.191424,0.113518,0.016294,0.271341,0.276458,0.304166,0.096879,0.220364,...,0.110776,0.053493,0.217273,0.08024,0.055941,0.16048,0.133734,0.269475,0.225207,0.157412


4


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,2.0,0.0,6.0,4.0,5.0,10.0,10.0,10.0,3.0,5.0,...,4.0,2.0,4.0,0.0,0.0,6.0,5.0,10.0,0.0,5.0
1,0.198054,0.198157,0.375577,0.175277,0.236529,0.272648,0.345655,0.441671,0.213072,0.245876,...,0.21868,0.30977,0.20814,0.090231,0.255978,0.22578,0.225739,0.390743,0.143231,0.200101
2,0.119949,0.175007,0.193429,0.215391,0.200084,0.327853,0.295962,0.285816,0.284278,0.234593,...,0.143166,0.049988,0.151812,0.135153,0.004445,0.263858,0.22016,0.362526,0.051444,0.208783
3,0.060322,0.006851,0.197375,0.119422,0.156529,0.285453,0.289871,0.314551,0.099345,0.205927,...,0.116538,0.056276,0.232797,0.130433,0.009611,0.171961,0.140689,0.28349,0.011869,0.16011


5


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,2.0,1.0,6.0,4.0,5.0,10.0,10.0,10.0,3.0,0.0,...,4.0,2.0,4.0,0.0,2.0,6.0,5.0,10.0,8.0,5.0
1,0.202365,0.227591,0.370457,0.171589,0.235578,0.26152,0.335165,0.427206,0.208212,0.148713,...,0.215651,0.306908,0.200982,0.092383,0.273616,0.22388,0.225689,0.37619,0.294331,0.194861
2,0.130978,0.174137,0.18015,0.214497,0.222422,0.327382,0.281085,0.282015,0.27694,0.105612,...,0.127431,0.052441,0.147257,0.121871,0.052441,0.255042,0.218247,0.336311,0.259906,0.22074
3,0.057985,0.034831,0.189988,0.108191,0.14863,0.279343,0.274902,0.306786,0.09825,0.094132,...,0.112023,0.054096,0.199216,0.13345,0.063682,0.165299,0.135239,0.272508,0.225116,0.154796


6


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,2.0,1.0,0.0,4.0,5.0,10.0,10.0,10.0,3.0,0.0,...,4.0,2.0,4.0,3.0,2.0,6.0,5.0,10.0,0.0,5.0
1,0.202112,0.212606,0.278367,0.189703,0.241263,0.280337,0.350292,0.435842,0.212107,0.148283,...,0.217446,0.305015,0.207825,0.154446,0.267701,0.233411,0.21377,0.38864,0.142021,0.201792
2,0.130902,0.173029,0.037489,0.243885,0.227528,0.341299,0.310076,0.293739,0.30458,0.093855,...,0.122296,0.056436,0.158624,0.217865,0.056436,0.236347,0.229033,0.386187,0.072395,0.227715
3,0.058175,0.037457,0.032217,0.116351,0.160996,0.300411,0.297338,0.322169,0.107708,0.09379,...,0.120472,0.058175,0.197144,0.087263,0.068485,0.177766,0.145439,0.29306,0.009074,0.165592


7


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,2.0,1.0,6.0,4.0,5.0,10.0,10.0,0.0,0.0,5.0,...,4.0,2.0,4.0,3.0,2.0,6.0,0.0,10.0,8.0,5.0
1,0.204712,0.229158,0.336948,0.186811,0.245832,0.267719,0.345438,0.245991,0.155276,0.254011,...,0.224041,0.258521,0.215269,0.163407,0.237832,0.235456,0.14417,0.375838,0.307652,0.20234
2,0.121554,0.175462,0.171877,0.21933,0.241228,0.317717,0.289196,0.040156,0.172259,0.203248,...,0.167319,0.052635,0.151373,0.168932,0.052635,0.283256,0.097639,0.32026,0.268693,0.219179
3,0.058336,0.032773,0.195667,0.11549,0.151375,0.281032,0.280178,0.037317,0.01721,0.239356,...,0.112701,0.054423,0.221878,0.081634,0.059136,0.166299,0.131101,0.274156,0.229495,0.166662


8


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,2.0,0.0,6.0,4.0,0.0,10.0,10.0,10.0,3.0,0.0,...,0.0,2.0,4.0,3.0,2.0,6.0,5.0,0.0,8.0,5.0
1,0.198606,0.215829,0.383883,0.1733,0.131903,0.277722,0.34777,0.440366,0.215763,0.136408,...,0.135837,0.319854,0.195874,0.154734,0.288594,0.232948,0.231925,0.186449,0.307747,0.190005
2,0.138368,0.171244,0.195435,0.213656,0.12892,0.369865,0.28371,0.289897,0.211984,0.088969,...,0.045637,0.0554,0.143248,0.171273,0.0554,0.284151,0.244914,0.111042,0.283646,0.222627
3,0.061377,0.006971,0.200134,0.114519,0.015498,0.295681,0.29098,0.320053,0.105394,0.113643,...,0.004056,0.05726,0.187877,0.085889,0.067407,0.174967,0.143149,0.002149,0.23978,0.163577


9


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,0.0,1.0,6.0,4.0,5.0,0.0,10.0,10.0,3.0,0.0,...,4.0,2.0,4.0,3.0,2.0,0.0,5.0,10.0,8.0,5.0
1,0.163719,0.243366,0.391058,0.179974,0.245098,0.073437,0.339106,0.448436,0.201578,0.148652,...,0.218906,0.330379,0.205556,0.159523,0.295851,0.102909,0.238784,0.397312,0.300329,0.193508
2,0.038233,0.169719,0.193072,0.234549,0.253964,0.070353,0.315467,0.300977,0.251603,0.089148,...,0.1505,0.059518,0.165992,0.177258,0.059518,0.108481,0.206918,0.360744,0.248993,0.235145
3,0.0042,0.035178,0.208916,0.116831,0.160118,0.00423,0.298566,0.3256,0.104064,0.094753,...,0.118261,0.058416,0.228172,0.087624,0.066382,0.003253,0.146039,0.29427,0.23927,0.155271


10


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,2.0,1.0,6.0,4.0,0.0,10.0,10.0,0.0,3.0,5.0,...,4.0,2.0,4.0,0.0,2.0,6.0,5.0,10.0,8.0,5.0
1,0.195928,0.220344,0.33538,0.173933,0.146404,0.263086,0.342587,0.246897,0.216253,0.252421,...,0.220526,0.25545,0.210986,0.103553,0.234397,0.229925,0.23953,0.368164,0.3062,0.194925
2,0.125826,0.183194,0.174826,0.213265,0.134039,0.331251,0.283084,0.039721,0.256463,0.204583,...,0.169489,0.052065,0.142102,0.129272,0.052065,0.284789,0.215951,0.346752,0.265781,0.214935
3,0.057722,0.032428,0.18796,0.114275,0.016402,0.278076,0.276602,0.029542,0.094831,0.224136,...,0.111515,0.05385,0.214347,0.134409,0.065246,0.164549,0.134626,0.271272,0.226157,0.163393


11


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,2.0,1.0,0.0,0.0,5.0,10.0,10.0,10.0,3.0,5.0,...,4.0,2.0,4.0,3.0,2.0,6.0,5.0,0.0,8.0,5.0
1,0.20975,0.218714,0.240942,0.108465,0.239079,0.273064,0.347716,0.371637,0.217871,0.246043,...,0.224213,0.248663,0.210553,0.153637,0.221586,0.236343,0.248535,0.182917,0.315137,0.207626
2,0.129646,0.178338,0.04785,0.101338,0.261566,0.341382,0.264231,0.282709,0.245599,0.215698,...,0.174833,0.053706,0.159971,0.159177,0.053706,0.274835,0.240713,0.096992,0.282035,0.236532
3,0.055646,0.035829,0.028893,0.006794,0.153211,0.283318,0.287581,0.306124,0.101645,0.246127,...,0.115233,0.055646,0.18634,0.083469,0.067422,0.170036,0.139114,0.002088,0.235483,0.17352


12


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,2.0,1.0,6.0,4.0,5.0,10.0,10.0,10.0,3.0,0.0,...,4.0,2.0,4.0,3.0,2.0,6.0,0.0,0.0,8.0,0.0
1,0.205945,0.229586,0.373091,0.179016,0.235476,0.275069,0.342028,0.425702,0.210912,0.144042,...,0.215802,0.304266,0.200702,0.15007,0.271657,0.236854,0.145563,0.190964,0.308041,0.102594
2,0.131458,0.17876,0.195307,0.199615,0.256043,0.335317,0.287796,0.289707,0.21328,0.11526,...,0.1363,0.055364,0.150417,0.169311,0.055364,0.279104,0.11503,0.124641,0.29074,0.118021
3,0.056583,0.03518,0.198452,0.113166,0.157978,0.292187,0.288228,0.314844,0.102174,0.090297,...,0.115789,0.056583,0.226491,0.084874,0.065968,0.172899,0.131802,0.002123,0.23328,0.019577


13


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,2.0,1.0,6.0,4.0,5.0,10.0,10.0,0.0,0.0,5.0,...,4.0,2.0,4.0,3.0,2.0,6.0,5.0,10.0,8.0,5.0
1,0.199401,0.225359,0.348546,0.185787,0.240413,0.268326,0.348111,0.259607,0.152536,0.256302,...,0.227369,0.273136,0.212093,0.156045,0.250886,0.234821,0.239544,0.373845,0.308932,0.200178
2,0.121593,0.178134,0.171091,0.20969,0.227094,0.333609,0.28929,0.032539,0.220582,0.192313,...,0.171402,0.052652,0.142581,0.194719,0.052652,0.281771,0.218552,0.3446,0.268781,0.224962
3,0.055501,0.032194,0.197238,0.117778,0.154717,0.286599,0.285727,0.035192,0.019505,0.238857,...,0.114933,0.055501,0.214069,0.083251,0.060307,0.169593,0.138752,0.279586,0.233195,0.171283


14


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,0.0,1.0,0.0,4.0,0.0,0.0,0.0,10.0,3.0,5.0,...,4.0,0.0,4.0,3.0,2.0,6.0,5.0,10.0,8.0,5.0
1,0.160496,0.231838,0.254582,0.18189,0.145954,0.070784,0.150298,0.410213,0.211377,0.244285,...,0.230369,0.257754,0.208592,0.167762,0.248514,0.239341,0.251478,0.384607,0.312932,0.200149
2,0.064141,0.173852,0.049297,0.213918,0.109068,0.099212,0.049945,0.288982,0.241688,0.231046,...,0.172828,0.004519,0.140521,0.182092,0.050812,0.246949,0.208329,0.362244,0.248051,0.213992
3,0.004067,0.036421,0.032721,0.120038,0.016084,0.004096,0.009507,0.323926,0.09869,0.22922,...,0.117138,0.131456,0.213386,0.084848,0.059154,0.172847,0.141414,0.284951,0.238694,0.166452


15


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,2.0,0.0,6.0,0.0,5.0,10.0,10.0,10.0,3.0,5.0,...,4.0,2.0,4.0,3.0,0.0,0.0,5.0,10.0,8.0,5.0
1,0.208996,0.20933,0.354701,0.099312,0.232282,0.25678,0.333822,0.40831,0.207864,0.242123,...,0.216147,0.281497,0.20497,0.148011,0.228174,0.114493,0.236938,0.374268,0.295165,0.190209
2,0.12328,0.174917,0.174253,0.11884,0.243991,0.312675,0.262064,0.27516,0.27605,0.212182,...,0.166248,0.046898,0.14478,0.184207,0.004171,0.127867,0.223475,0.329362,0.260376,0.211649
3,0.05601,0.007518,0.183516,0.00638,0.143566,0.269827,0.268516,0.29869,0.096199,0.225525,...,0.108207,0.052253,0.194608,0.078379,0.011058,0.132068,0.130632,0.263225,0.221409,0.161461


16


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,2.0,1.0,0.0,4.0,5.0,10.0,10.0,10.0,3.0,5.0,...,0.0,2.0,4.0,3.0,2.0,6.0,5.0,10.0,8.0,5.0
1,0.210353,0.217774,0.252576,0.187102,0.240769,0.270849,0.348136,0.391559,0.214662,0.253806,...,0.144051,0.263212,0.209027,0.159494,0.235749,0.236654,0.231183,0.376326,0.303697,0.205667
2,0.136082,0.175136,0.041041,0.218772,0.217987,0.326695,0.282385,0.275618,0.284784,0.189749,...,0.061069,0.051396,0.151703,0.198409,0.051396,0.249323,0.216682,0.3517,0.247966,0.211321
3,0.058547,0.035168,0.028272,0.115908,0.149086,0.282049,0.280624,0.311132,0.100535,0.237189,...,0.003869,0.05462,0.20795,0.081929,0.05935,0.1669,0.136549,0.275148,0.232867,0.167953


17


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,0.0,1.0,6.0,4.0,5.0,10.0,10.0,10.0,0.0,5.0,...,4.0,2.0,0.0,3.0,2.0,6.0,5.0,10.0,8.0,5.0
1,0.158246,0.224986,0.372427,0.181922,0.235791,0.268142,0.335512,0.422106,0.150505,0.23748,...,0.215286,0.308169,0.1296,0.155724,0.275852,0.223609,0.225806,0.377136,0.298252,0.191952
2,0.072335,0.177645,0.189593,0.208042,0.215086,0.326318,0.276004,0.278443,0.19817,0.206207,...,0.169281,0.052001,0.05404,0.182311,0.052001,0.273431,0.215848,0.324806,0.26314,0.199663
3,0.003931,0.032919,0.181833,0.116003,0.147709,0.278321,0.280633,0.291965,0.016745,0.242072,...,0.113201,0.054664,0.078934,0.081997,0.066233,0.167037,0.136661,0.273322,0.231297,0.167843


18


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,2.0,1.0,0.0,4.0,5.0,10.0,10.0,10.0,3.0,5.0,...,4.0,2.0,0.0,3.0,2.0,6.0,0.0,10.0,8.0,5.0
1,0.209083,0.223751,0.256652,0.184435,0.24306,0.275279,0.341389,0.396991,0.205257,0.246506,...,0.221237,0.270866,0.130679,0.163123,0.246536,0.236173,0.141925,0.378014,0.2985,0.204902
2,0.127129,0.17224,0.047557,0.217312,0.217118,0.328041,0.293272,0.27878,0.231179,0.226095,...,0.162643,0.053377,0.050831,0.179973,0.053377,0.26582,0.094334,0.34069,0.262281,0.205208
3,0.059372,0.031228,0.017987,0.117541,0.14851,0.280957,0.28378,0.316775,0.095782,0.227218,...,0.114702,0.055389,0.099319,0.083084,0.057924,0.169252,0.128355,0.276946,0.234983,0.162819


19


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,2.0,1.0,0.0,4.0,5.0,10.0,10.0,10.0,3.0,5.0,...,4.0,2.0,4.0,3.0,2.0,6.0,5.0,10.0,8.0,5.0
1,0.209421,0.227521,0.274866,0.182511,0.245339,0.275254,0.339957,0.424855,0.197327,0.246546,...,0.217262,0.304199,0.209925,0.156705,0.268943,0.227345,0.228526,0.390019,0.307411,0.190914
2,0.125969,0.169464,0.036803,0.217053,0.208195,0.333658,0.303831,0.292932,0.271252,0.220504,...,0.175191,0.056087,0.146856,0.195992,0.056087,0.254204,0.234577,0.351915,0.275595,0.223676
3,0.06133,0.03684,0.028214,0.121418,0.152407,0.290223,0.29402,0.314562,0.101927,0.229206,...,0.115832,0.057216,0.175579,0.085824,0.066988,0.174834,0.14304,0.288227,0.239775,0.165896


Avg Training Mean Squared Error: 0.0
Avg Testing Mean Squared Error: 7.34
