# Logistic Regression - (Time point, ROI) Features

In [1]:
import numpy as np
import pickle
import scipy as scp
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [2]:
# Turn dictionary into 2D Array
def createData(movieDict):
    # movieList = list(movieDict.keys())
    # vals = list(movieDict.values())
    
    # Reduce to 2 dimensions
    X = np.empty((176*18, 65*300+2), dtype="object")

    
    for key, row in movieDict.items():
        print(row.shape)
        # Testretest
        if len(row.shape) == 4:
            for i in range(row.shape[0]):
                for j in range(row.shape[-3]):
                    X[j][-2] = 'testretest'
                    X[j][-1] = j
                    for k in range(65):
                        for l in range(row.shape[-1]):
                            X[j][k*row.shape[-1] + l] = row[i][j][k][l]
                            
        # Otherwise
        else:
            for j in range(row.shape[-3]):
                X[j][-2] = key
                X[j][-1] = j
                for k in range(65):
                    for l in range(row.shape[-1]):
                            X[j][k*row.shape[-1] + l] = row[j][k][l]
                         
    # Randomly split participants
    X_train = []
    X_test = []
    y_train = []
    y_test = []
    testIndex = np.random.randint(0, 176, 76)
    np.random.shuffle(X)
    for row in X:
        print(row)
        if row[-1] in testIndex:
            X_test.append(row[:-2])
            y_test.append(row[-2])
        else:
            X_train.append(row[:-2])
            y_train.append(row[-2])

    X_train = np.array(X_train).astype(float)
    X_test = np.array(X_test).astype(float)
    y_train = np.array(y_train)
    y_test = np.array(y_test)
    return X_train, X_test, y_train, y_test
    

In [3]:
with open('HCP_movie_watching.pkl','rb') as f:
    TS = pickle.load(f)

X_train, X_test, y_train, y_test = createData(TS)

(4, 176, 84, 300)


In [61]:
# model = LogisticRegression(multi_class='multinomial', solver='sag')
# model.fit(X_train, y_train)

In [62]:
# acc = model.score(X_test, y_test)
# print("Accuracy: ", acc)

In [63]:
# Cost Function
def cost(X, Y, W):
    h = 1 / (1 + np.exp(-np.dot(X, W))) # hypothesis representation
    cost = np.dot(Y, -np.log(h)) + np.dot((1-Y), np.log(1-h)) # cost function
    J = -1 / (len(X)) * np.sum(cost) # mean cost
    return J


def gradient(X, Y, W):
    h = 1 / (1 + np.exp(-np.dot(X, W)))
    diff = h - Y
    grad = 1 / (len(X)) * np.dot(diff, X)
    return grad

    
def descent(X_train, Y_train, lr = 0.01):
    weights = [0]*(len(X_train[0]))
    loss = []
    loss.append(cost(X_train, Y_train, weights))
    count = 0
    while count < 1000:
        grad = gradient(X_train, Y_train, weights)
        weights = weights - lr*grad
        loss.append(cost(X_train, Y_train, weights))
        count += 1

    return weights

In [64]:
def createYMask(movie, Y):
    yMasked = np.zeros(Y.shape)
    mask = Y == movie
    yMasked[mask] = 1
    return yMasked

In [65]:
movieList = list(TS.keys())
modelWeights = []
for movie in movieList:
    yMasked = createYMask(movie, y_train)
    W = descent(X_train, yMasked)
    modelWeights.append(W)

[ 4.31609323e-03  6.14887169e-03  4.53452522e-03 ...  4.72152925e-05
 -9.83087297e-03 -6.55919310e-03]
[ 0.00646204  0.00415734  0.00890824 ...  0.00403777  0.00344235
 -0.00106615]
[ 0.00498677  0.00558748  0.00751926 ...  0.00075747  0.00293276
 -0.0006267 ]
[ 0.00265687  0.00501067  0.00621069 ...  0.00060875  0.00336237
 -0.00251431]
[ 0.0068925   0.00766283  0.01091374 ... -0.00388291 -0.00383299
 -0.00625019]
[7.77406593e-03 4.59182186e-03 6.29960223e-03 ... 3.75088706e-05
 1.11880762e-02 1.91867297e-03]
[ 0.00896773  0.00642047  0.01210803 ...  0.00137281 -0.00044694
 -0.00337771]
[0.00463044 0.0060374  0.00595867 ... 0.00164268 0.00177911 0.0047781 ]
[ 0.00879431  0.00402151  0.00923612 ... -0.00132012 -0.00480074
 -0.00550729]
[ 0.00812091  0.00352184  0.00545422 ...  0.00077628  0.00114374
 -0.00576554]
[ 0.00512857  0.00494274  0.00169858 ...  0.00159787 -0.00048169
 -0.0042462 ]
[ 0.00383971  0.00125059  0.00385125 ...  0.00394294 -0.00079893
  0.00387691]
[ 0.00531169  0.0

In [69]:
def sigmoid(X, W):
    return 1 / (1 + np.exp(-np.dot(X, W)))

predY = []
for x in X_test:
    probList = [sigmoid(x, W) for W in modelWeights]
    predY.append(movieList[probList.index(max(probList))])

pMask = y_test == predY # create mask for values where predicted is correct
acc = sum(pMask) / len(pMask)
print(acc)

0.9722222222222222
