<h1 align="center" style="background-color:#616161;color:white">Linear Regression with SVM</h1>

Adapted from: https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/recurrent_network.py

<h3 style="background-color:#616161;color:white">0. Setup</h3>

<div style="background-color:white; color:#008000; font-family: 'Courier New, Monospace;font-weight: bold">Input Parameters</div>

In [1]:
PeriodGranularity = 30 # E.g. 15, 30, 60
# Train / Test split
newUsers = 10   # Num of randomly selected users to separate out of eval 2
rndPeriods = 3 # Num of random periods from each use to select
rndPeriodsLength = int(60/PeriodGranularity) * 24 * 7 * 4     # How long the random test period should cover
                                                                                                                                                                                                                                                                                                                                        
# Root path
#root = "C:/DS/Github/MusicRecommendation"  # BA, Windows
root = "/home/badrul/git/EventPrediction" # BA, Linux

<div style="background-color:white; color:#008000; font-family: 'Courier New, Monospace;font-weight: bold">Common Libraries</div>

In [2]:
# Core
import numpy as np
import pandas as pd
from IPython.core.debugger import Tracer    # Used for debugging
import logging

# File and database management
import csv
import os
import sys
import json
import sqlite3
from pathlib import Path

# Date/Time
import datetime
import time
#from datetime import timedelta # Deprecated

# Visualization
import matplotlib.pyplot as plt             # Quick
%matplotlib inline

# Misc
import random

#-------------- Custom Libs -----------------#
os.chdir(root)

# Import the codebase module
fPath = root + "/1_codemodule"
if fPath not in sys.path: sys.path.append(fPath)

# Custom Libs
import coreCode as cc
import lastfmCode as fm

<div style="background-color:white; color:#008000; font-family: 'Courier New, Monospace;font-weight: bold">Page Specific Libraries</div>

In [3]:
# Data science (comment out if not needed)
#from sklearn.manifold import TSNE
import tensorflow as tf
from tensorflow.contrib import rnn
from tensorflow.python.framework import ops
ops.reset_default_graph()
from sklearn import metrics
from sklearn import preprocessing

<div style="background-color:#white; color:#008000; font-family: 'Courier New, Monospace;font-weight: bold">Declare Functions</div>

<div style="background-color:#white; color:#008000; font-family: 'Courier New, Monospace;font-weight: bold">Load settings</div>

In [4]:
settingsDict =  cc.loadSettings()
dbPath = root + settingsDict['mainDbPath']
fmSimilarDbPath = root + settingsDict['fmSimilarDbPath']
fmTagsDbPath = root + settingsDict['fmTagsDbPath']
trackMetaDbPath = root + settingsDict['trackmetadata']

<h3 style="background-color:#616161;color:white">1. Load data</h3>

In [5]:
def getTrainAndTestData():
    con = sqlite3.connect(dbPath)
    c = con.cursor()

    # Get list of UserIDs 
    trainUsers = pd.read_sql_query("Select UserID from tblUsers Where tblUsers.TestUser = 0",con)

    fieldList="t, UserID, HrsFrom6pm, isSun,isMon,isTue,isWed,isThu,isFri,isSat,t1,t2,t3,t4,t5,t10,t12hrs,t24hrs,t1wk,t2wks,t3wks,t4wks"
    trainDf=pd.DataFrame(columns=[fieldList])  # Create an emmpty df
    testDf=pd.DataFrame(columns=[fieldList])  # Create an emmpty df
    periodsInAMonth=int(60/PeriodGranularity)*24*7*4

    totalRows=0
    
    for user in trainUsers.itertuples():
        # Get training dataset
        SqlStr="SELECT {} from tblTimeSeriesData where UserID = {}".format(fieldList,user.userID)
        df = pd.read_sql_query(SqlStr, con)
        totalRows += len(df)
    
        # Cut-off 1
        k = random.randint(periodsInAMonth, len(df))
        #Tracer()()  -- for debugging purposes
        testDf = testDf.append(df.iloc[k:k+periodsInAMonth])[df.columns.tolist()]

        tmp = df.drop(df.index[k:k+periodsInAMonth])

        # Cut-off 2
        k = random.randint(periodsInAMonth, len(tmp))
        testDf = testDf.append(tmp.iloc[k:k+periodsInAMonth])[df.columns.tolist()]
        trainDf = trainDf.append(tmp.drop(tmp.index[k:k+periodsInAMonth]))[df.columns.tolist()]

    if len(trainDf)+len(testDf) == totalRows:
        print('Ok')
    else:
        print("Incorrect. Total Rows = {}. TestDf+TrainDf rows = {}+{}={}".format(totalRows,len(testDf),len(trainDf),len(testDf)+len(trainDf)))
        
    return trainDf, testDf

trainDf,testDf = getTrainAndTestData()

#trainDf = trainDf.iloc[0:2000]
#testDf = testDf.iloc[0:2000]

trainDf['t'].replace(to_replace='0', value='-1', inplace=True)
testDf['t'].replace(to_replace='0', value='-1', inplace=True)
realDataX = trainDf.drop(['t','UserID'], 1).values
realDataY = trainDf['t'].values.astype(int)
realDataY =realDataY.reshape(len(realDataY),1)

# Test data
x_vals_test= testDf.drop(['t','UserID'], 1).values
y_vals_test = testDf['t'].values.astype(int)
y_vals_test = np.array([1 if y==1 else -1 for y in y_vals_test])
y_vals_test=y_vals_test.reshape(len(y_vals_test),1)


Ok


<div style="background-color:#white; color:#008000; font-family: 'Courier New, Monospace;font-weight: bold">Confirm dimensions</div>

In [6]:
numOfFeatures = np.shape(realDataX)[1]
np.shape(realDataX),np.shape(realDataY)

((1791121, 20), (1791121, 1))

In [None]:
np.shape(x_vals_test), np.shape( y_vals_test),np.shape(y_vals_test_onehot)

<h3 style="background-color:#616161;color:white">3. RNN Model</h3>

In [None]:
from __future__ import print_function
import GPflow
import tensorflow as tf
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

option = 5

if option ==1:  # Tutorial, dummy data, 1 dimension - definitely works
    N = 2000
    Xtrain = np.random.rand(N,1)
    Ytrain = np.sin(12*Xtrain) + 0.66*np.cos(25*Xtrain) + np.random.randn(N,1)*0.1 + 3
    batch_size = 300
    display_step = 2
elif option ==2: # Use dummy data, 20 dimensions
    N = 50000
    Xtrain = np.round(np.random.random((N,20)),0)  # This works. Use this line if you wish to test.
    Ytrain = np.round(np.random.random((N,1)),0)  # This works. Use this line if you wish to test.
elif option ==3: # Use subset of real data
    N=20000
    Xtrain = realDataX[0:N] 
    Ytrain = realDataY[0:N] 
elif option == 4: #Do nothing / Use full data
    N= np.shape(realData)[0]
    Xtrain = realDataX[0:N] 
    Ytrain = realDataY[0:N] 
elif option == 5: #Sometimes works, sometimes get either of the two errors
    N= np.shape(realDataX)[0]
    Xtrain = realDataX
    Ytrain = realDataY
    
batch_size = int(N/5)
training_iters = np.shape(Xtrain)[0]-batch_size
display_step = 500
    

# Network Parameters
n_input = np.shape(Xtrain)[1]
n_steps = 1 # timesteps
n_hidden = 128 # hidden layer num of features
n_classes = 2

step=1


# Keep training until reach max iterations
while step * batch_size < training_iters:
    if step%display_step == 0: print("Step: {}".format(step))
    X = Xtrain[step*batch_size:(step*batch_size)+batch_size]
    Y = Ytrain[step*batch_size:(step*batch_size)+batch_size]  
        
    k = GPflow.kernels.Matern52(1, lengthscales=0.3)
    #m = GPflow.gpr.GPR(X, Y, kern=k)
    m = GPflow.gpr.GPR(np.array(X, dtype=float), np.array(Y, dtype=float), kern=k)
    m.likelihood.variance = 0.01

    
    step += 1

mean, var = m.predict_y(Xtrain[2:3])  # One row of input

print(mean,var)

In [11]:
np.dtype(realDataX[100][0])

dtype('float64')