In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sb
from scipy import stats

from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso, MultiTaskLasso
from sklearn.linear_model import BayesianRidge
from sklearn.linear_model import SGDRegressor # when attributes are big
from sklearn.kernel_ridge import KernelRidge # identical to support vector regression (SVR)
from sklearn.svm import SVR # use support vector regression (SVR)
from sklearn.tree import DecisionTreeRegressor

from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import roc_curve, auc

from sklearn.dummy import DummyRegressor

# Load prepared data

In [None]:
save_path = "/Users/chikakoolsen/opt/python/thesis/code/tdcs_thesis/data/raw/"

## fmap mean all experiments

In [None]:
file_name = save_path+"fmap_mean.txt"
columns =['exp', 'mini_exp', 'i', 'j', 'k', 'mean0', 'mean1', 'mean2', 'mean3', 'mean4', 'theory']
data = np.loadtxt(file_name);

In [None]:
df = pd.DataFrame(data, columns=columns)
df = df.astype({"exp": int, "i": int, "j": int, "k": int, "mini_exp": int})

In [None]:
df

In [None]:
df.info()

## 55 fmap means

In [None]:
arr = [4, 6, 8, 10, 12]
col = []
for i in range(len(arr)):
    for j in range(1, 12):
        name = str(arr[i])+'_'+str(j+1)+'m'+str(j)
        col.append(name)

In [None]:
file_name = save_path+"fmap_mean_55.txt"
columns = ['exp', 'mini_exp', 'i', 'j', 'k']
for i in range(len(col)): 
    columns.append(col[i])
columns.append('theory')
data = np.loadtxt(file_name);

In [None]:
df_55 = pd.DataFrame(data, columns=columns)
df_55 = df_55.astype({"exp": int, "i": int, "j": int, "k": int, "mini_exp": int})

In [None]:
df_55

In [None]:
df_55.info()

## Only +ON and -ON 

In [None]:
file_name = save_path+"fmap_on.txt"
columns =['exp', 'mini_exp', 'run', 'i', 'j', 'k', 'off', 'on', 'theory']
data = np.loadtxt(file_name);

In [None]:
df_on = pd.DataFrame(data, columns=columns)
df_on = df_on.astype({"exp": int, "i": int, "j": int, "k": int, "mini_exp": int, "run":int})

In [None]:
df_on[(df_on['i']==31)&(df_on['j']==31)&(df_on['k']==31)]

In [None]:
df_on

In [None]:
df_on.info()

## New attributes for Linear Models

In [None]:
file_name = save_path+"fmap_mean_new.txt"
columns =['exp', 'mini_exp', 'i', 'j', 'k', 'mean0', 'mean1', 'mean2', 'mean3', 'mean4', 'theory', 'new0', 'new1', 'new2', 'new3', 'new4', 'slope', 'intercept']
data = np.loadtxt(file_name);

In [None]:
df_new = pd.DataFrame(data, columns=columns)
df_new = df_new.astype({"exp": int, "i": int, "j": int, "k": int, "mini_exp": int})

In [None]:
df_new.isnull().any()

In [None]:
df_new.describe()

In [None]:
df_new.info()

# Split data

## Data 1: fmap mean all experiments

In [None]:
df.iloc[:, 2:-1]

In [None]:
X = df.iloc[:, 2:-1].values  # include i,j,k
y = df['theory'].values

X1_train, X1_test, y1_train, y1_test = train_test_split(
    X, y, test_size=0.3, random_state=0)

In [None]:
print(X.shape)
print(y.shape)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

## Data2: One mini experiment

In [25]:
df_train = df[(df['exp']==38) & (df['mini_exp']!=6)]
df_test = df[(df['exp']==38) & (df['mini_exp']==6)]

In [8]:
df_train.iloc[:, 2:-1]

Unnamed: 0,i,j,k,mean0,mean1,mean2,mean3,mean4
0,0,0,0,0.0,0.0,0.0,0.0,0.0
1,0,0,1,0.0,0.0,0.0,0.0,0.0
2,0,0,2,0.0,0.0,0.0,0.0,0.0
3,0,0,3,0.0,0.0,0.0,0.0,0.0
4,0,0,4,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...
901115,43,63,59,0.0,0.0,0.0,0.0,0.0
901116,43,63,60,0.0,0.0,0.0,0.0,0.0
901117,43,63,61,0.0,0.0,0.0,0.0,0.0
901118,43,63,62,0.0,0.0,0.0,0.0,0.0


In [27]:
X2_train = df_train.iloc[:, 5:-1].values
y2_train = df_train['theory'].values

X2_test = df_test.iloc[:, 5:-1].values
y2_test = df_test['theory'].values

In [28]:
print(X2_train.shape)
print(y2_train.shape)
print(X2_test.shape)
print(y2_test.shape)

(901120, 5)
(901120,)
(180224, 5)
(180224,)


## Data3: 55 means

In [None]:
df_55.iloc[:, 2:-1]

In [None]:
X = df_55.iloc[:, 2:-1].values  # include i,j,k
y = df_55['theory'].values

X3_train, X3_test, y3_train, y3_test = train_test_split(
    X, y, test_size=0.3, random_state=0)

In [None]:
print(X.shape)
print(y.shape)
print(X3_train.shape)
print(y3_train.shape)
print(X3_test.shape)
print(y3_test.shape)

## Data4: +ON and -ON

In [None]:
df_on.iloc[:, 2:-1]

In [None]:
X = df_on.iloc[:, 2:-1].values  # include i,j,k
y = df_on['theory'].values

X4_train, X4_test, y4_train, y4_test = train_test_split(
    X, y, test_size=0.3, random_state=0)

In [None]:
print(X.shape)
print(y.shape)
print(X4_train.shape)
print(y4_train.shape)
print(X4_test.shape)
print(y4_test.shape)

## Data5: fmap mean without zero 
Not so effective

In [None]:
df_copy = df.copy()
idx = df[(df['mean0']==0.0) & (df['mean1']==0.0) & (df['mean2']==0.0) & (df['mean3']==0.0) & (df['mean4']==0.0)].index
df_copy.drop(idx, inplace=True)
df_copy.reset_index(drop=True, inplace=True)
df_copy.shape

In [None]:
df_nonzero = df[(df['mean0']!=0.0) & (df['mean1']!=0.0) & (df['mean2']!=0.0) & (df['mean3']!=0.0) & (df['mean4']!=0.0)]

In [None]:
df_nonzero

In [None]:
X = df_nonzero.iloc[:, 5:-1].values
y = df_nonzero['theory'].values

X5_train, X5_test, y5_train, y5_test = train_test_split(
    X, y, test_size=0.3, random_state=0)

In [None]:
print(X.shape)
print(y.shape)
print(X5_train.shape)
print(y5_train.shape)
print(X5_test.shape)
print(y5_test.shape)

## Data6: One experiment (new attributes)

In [None]:
df_train_new = df_new[(df_new['exp']==38) & (df_new['mini_exp']!=6)]
df_test_new = df_new[(df_new['exp']==38) & (df_new['mini_exp']==6)]

In [None]:
df_new.iloc[:, -7:-2]

In [None]:
X_train_new = df_train_new.iloc[:, -7:-2].values
y_train_new = df_train_new['theory'].values

X_test_new = df_test_new.iloc[:, -7:-2].values
y_test_new = df_test_new['theory'].values

In [None]:
print(X_train_new.shape)
print(y_train_new.shape)
print(X_test_new.shape)
print(y_test_new.shape)

# Linear Regression

## Data1: fmap mean all experiments

In [None]:
X_train = X1_train
y_train = y1_train
X_test = X1_test
y_test = y1_test

In [None]:
lr = LinearRegression()
lr.fit(X_train, y_train)
y_train_pred = lr.predict(X_train)
y_test_pred = lr.predict(X_test)

In [None]:
print('Intercept: %.3f' % lr.intercept_)

In [None]:
lr.coef_

In [None]:
print('MSE train: %.3f, test: %.3f' % (
        mean_squared_error(y_train, y_train_pred),
        mean_squared_error(y_test, y_test_pred)))
print('R^2 train: %.3f, test: %.3f' % (
        r2_score(y_train, y_train_pred),
        r2_score(y_test, y_test_pred)))

In [None]:
print('Train Accuracy: %.3f' % lr.score(X_train, y_train))
print('Test Accuracy: %.3f' % lr.score(X_test, y_test))

In [None]:
df[(df['i']==31) & (df['j']==31) & (df['k']==31)]

In [None]:
lr.predict( [[31, 31, 31, 702.532715, 736.654358, 702.625122, 668.777039, 697.872681]])

In [None]:
plt.scatter(y_test, y_test_pred,  color='black')
plt.plot(y_test, y_test_pred, color='green', linewidth=2)

## Data2: One experiment

In [29]:
X_train = X2_train
y_train = y2_train
X_test = X2_test
y_test = y2_test

In [30]:
lr = LinearRegression()
lr.fit(X_train, y_train)
y_train_pred = lr.predict(X_train)
y_test_pred = lr.predict(X_test)

In [31]:
print('Intercept: %.3f' % lr.intercept_)

Intercept: 2.169


In [32]:
lr.coef_

array([ 0.00348759,  0.00659004,  0.00170523, -0.00598573,  0.00075654])

In [33]:
print('MSE train: %.3f, test: %.3f' % (
        mean_squared_error(y_train, y_train_pred),
        mean_squared_error(y_test, y_test_pred)))
print('R^2 train: %.3f, test: %.3f' % (
        r2_score(y_train, y_train_pred),
        r2_score(y_test, y_test_pred)))

MSE train: 94.163, test: 94.070
R^2 train: 0.057, test: 0.060


In [34]:
print('Train Accuracy: %.3f' % lr.score(X_train, y_train))
print('Test Accuracy: %.3f' % lr.score(X_test, y_test))

Train Accuracy: 0.057
Test Accuracy: 0.060


In [None]:
lr.predict( [[31, 31, 31, 702.532715, 736.654358, 702.625122, 668.777039, 697.872681]])

## Data3: 55 fmean

In [None]:
X_train = X3_train
y_train = y3_train
X_test = X3_test
y_test = y3_test

In [None]:
lr = LinearRegression()
lr.fit(X_train, y_train)
y_train_pred = lr.predict(X_train)
y_test_pred = lr.predict(X_test)

In [None]:
print('Intercept: %.3f' % lr.intercept_)

In [None]:
lr.coef_

In [None]:
print('MSE train: %.3f, test: %.3f' % (
        mean_squared_error(y_train, y_train_pred),
        mean_squared_error(y_test, y_test_pred)))
print('R^2 train: %.3f, test: %.3f' % (
        r2_score(y_train, y_train_pred),
        r2_score(y_test, y_test_pred)))

In [None]:
print('Train Accuracy: %.3f' % lr.score(X_train, y_train))
print('Test Accuracy: %.3f' % lr.score(X_test, y_test))

In [None]:
plt.scatter(y_test, y_test_pred,  color='black')
plt.plot(y_test, y_test_pred, color='green', linewidth=2)

## Data4: +ON and -ON

In [None]:
X_train = X4_train
y_train = y4_train
X_test = X4_test
y_test = y4_test

In [None]:
lr = LinearRegression()
lr.fit(X_train, y_train)
y_train_pred = lr.predict(X_train)
y_test_pred = lr.predict(X_test)

In [None]:
print('Intercept: %.3f' % lr.intercept_)

In [None]:
lr.coef_

In [None]:
print('MSE train: %.3f, test: %.3f' % (
        mean_squared_error(y_train, y_train_pred),
        mean_squared_error(y_test, y_test_pred)))
print('R^2 train: %.3f, test: %.3f' % (
        r2_score(y_train, y_train_pred),
        r2_score(y_test, y_test_pred)))

In [None]:
print('Train Accuracy: %.3f' % lr.score(X_train, y_train))
print('Test Accuracy: %.3f' % lr.score(X_test, y_test))

In [None]:
plt.scatter(y_test, y_test_pred,  color='black')
plt.plot(y_test, y_test_pred, color='green', linewidth=2)

## Data6: new attributes

In [None]:
lr = LinearRegression()
lr.fit(X_train_new, y_train_new)
y_train_pred = lr.predict(X_train_new)
y_test_pred = lr.predict(X_test_new)

In [None]:
print('Intercept: %.3f' % lr.intercept_)

In [None]:
lr.coef_

In [None]:
print('Train Accuracy: %.3f' % lr.score(X_train_new, y_train_new))
print('Test Accuracy: %.3f' % lr.score(X_test_new, y_test_new))

In [None]:
lr.predict( [[786.106262, 848.382385, 782.896973, 716.524658, 781.549927]])

In [None]:
plt.scatter(y_test_new, y_test_pred,  color='black')
plt.plot(y_test_new, y_test_pred, color='green', linewidth=2)

In [None]:
df_test_new['predict'] = y_test_pred

In [None]:
df_test_new

# Output predicted data

In [None]:
#df_output = df[df['']]

In [None]:
#np.savetxt(save_path+"lr_predict1.txt", df[['i', 'j', 'k', 'predict']], fmt="%i %i %i %s")

In [None]:
df_test['predict'] = y_test_pred2
np.savetxt(save_path+"lr_predict2.txt", df_test[['i', 'j', 'k', 'predict']], fmt="%i %i %i %s")

In [None]:
#np.savetxt(save_path+"lr_predict3.txt", df_55[['i', 'j', 'k', 'predict']], fmt="%i %i %i %s")

In [None]:
#np.savetxt(save_path+"lr_predict4.txt", df_on[['i', 'j', 'k', 'predict']], fmt="%i %i %i %s")

In [None]:
#np.savetxt(save_path+"lr_predict_new.txt", df_test_new[['i', 'j', 'k', 'predict']], fmt="%i %i %i %s")