# Basic latent factor model

In [1]:
from surprise import SVD
from surprise import evaluate, print_perf, Reader, Dataset, accuracy
import pickle
import numpy as np
from random import shuffle
from bs4 import BeautifulSoup
import requests
import math
import datetime
import time

In [2]:
f = open('../Results/Pickles/member_names.pickle', 'rb')
member_names = pickle.load(f)
f.close()

In [3]:
f = open('../Results/Pickles/edge_details.pickle', 'rb')
edge_details = pickle.load(f)
f.close()

In [4]:
f = open('../Data/latent1.csv', 'w')

n = len(member_names)

for i in range(n):
    for j in range(n):
        if i == j:
            continue
        if (i, j) in edge_details or (j, i) in edge_details:
            f.write(str(i) + ',' + str(j) + ',1\n')
        else:
            f.write(str(i) + ',' + str(j) + ',0\n')
            
f.close()   

In [5]:
file_path = '../Data/latent1.csv'

reader = Reader(line_format='user item rating', sep=',')

data = Dataset.load_from_file(file_path, reader=reader)
data.split(n_folds=5)

In [6]:
algo = SVD()

for trainset, testset in data.folds():

    # train and test algorithm.
    algo.train(trainset)
    predictions = algo.test(testset)

    # Compute and print Root Mean Squared Error
    rmse = accuracy.rmse(predictions, verbose=True)

RMSE: 0.7359
RMSE: 0.7333
RMSE: 0.7329
RMSE: 0.7332
RMSE: 0.7333


# Latent Factor Model with timestamp

In [10]:
f = open('../Data/latent2.csv', 'w')

n = len(member_names)

all_timestamps = set([])
yearwise_timestamps = {}

for rec in edge_details:
    for debate in edge_details[rec]:
        for timestamp in edge_details[rec][debate]:
            all_timestamps.add(timestamp)
            
for ts in all_timestamps:
    if ts.year in yearwise_timestamps:
        continue
    yearwise_timestamps[ts.year] = ts
    
print(yearwise_timestamps)

{2012: datetime.datetime(2012, 9, 7, 0, 0), 2010: datetime.datetime(2010, 12, 13, 0, 0), 2013: datetime.datetime(2013, 4, 26, 0, 0), 2014: datetime.datetime(2014, 2, 5, 0, 0), 2011: datetime.datetime(2011, 2, 25, 0, 0), 2009: datetime.datetime(2009, 12, 1, 0, 0)}


In [11]:
all_years = yearwise_timestamps.keys()

for i in range(n):
    for j in range(n):
        if i == j:
            continue
        current_years = set([])
        if (i, j) in edge_details or (j, i) in edge_details:
            minind = min(i, j)
            maxind = max(i, j)
            for debate in edge_details[(minind, maxind)]:
                for timestamp in edge_details[(minind, maxind)][debate]:
                    f.write(str(i) + ',' + str(j) + ',1,' + str(timestamp) + '\n')
                    current_years.add(timestamp.year)
            for year in all_years:
                if year not in current_years:
                    f.write(str(i) + ',' + str(j) + ',0,' + str(timestamp) + '\n')
        else:
            for year in all_years:
                f.write(str(i) + ',' + str(j) + ',0,' + str(timestamp) + '\n')
            
f.close()   

In [12]:
file_path = '../Data/latent2.csv'

reader = Reader(line_format='user item rating timestamp', sep=',')

data = Dataset.load_from_file(file_path, reader=reader)
data.split(n_folds=5)

In [13]:
algo = SVD()

for trainset, testset in data.folds():

    # train and test algorithm.
    algo.train(trainset)
    predictions = algo.test(testset)

    # Compute and print Root Mean Squared Error
    rmse = accuracy.rmse(predictions, verbose=True)

RMSE: 0.8861
RMSE: 0.8859
RMSE: 0.8860
RMSE: 0.8859
RMSE: 0.8863
