In [1]:
import pandas as pd
import numpy as np
import glob
from gplearn.genetic import SymbolicRegressor
from sklearn.metrics import mean_squared_error as MSE
from sklearn.model_selection import train_test_split
import torch
import matplotlib.pyplot as plt
from scipy.signal import savgol_filter
from sklearn.metrics import r2_score, mean_squared_error

In [2]:
mens = []
womens = []
labels = pd.read_csv('./gender_labels.csv')
for s in glob.glob('/neuro/notebooks/all_data_confounds_remove/*.csv'):
    person = int(s.split('/')[-1].split('_')[0])
    data = pd.read_csv(s)
    data = data.rolling(window=10).mean().dropna()
    if labels[labels['person']==person]['gender'].values[0]=='M':
        mens.append(data)
    else:
        womens.append(data)
mens = pd.concat(mens)
womens = pd.concat(womens)  

In [3]:
len(mens)

147684

In [4]:
# data = pd.read_csv('../notebooks/filter_with_confounds_dataset.csv')
# data = mens
# region = 'x1'
# X = data.drop([region], axis=1).values
# y = data[region].values
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3, random_state=0)

In [7]:
loses_by_epoch = {}
data = mens
print(data.shape)
for i in range(34, 41):
    region = 'x'+str(i)
    print(region)
    loses_by_epoch[region] = list()
    X = data.drop([region], axis=1).values
    y = data[region].values
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3, random_state=0)

    device='cuda:1'
    X_train_torch = torch.FloatTensor(X_train).to(device)
    y_train_torch = torch.FloatTensor(y_train).to(device)

    X_test_torch = torch.FloatTensor(X_test).to(device)
    y_test_torch = torch.FloatTensor(y_test).to(device)

    model = torch.nn.Sequential(
        torch.nn.Linear(47, 100),
        torch.nn.Softplus(),
        torch.nn.Linear(100, 50),
        torch.nn.Softplus(),
        torch.nn.Linear(50, 25),
        torch.nn.Tanh(),
        torch.nn.Linear(25, 1),
    ).to(device)
    loss_fn = torch.nn.MSELoss(reduction='mean')

    learning_rate = 1e-2
    batch_size = 8096
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    for t in range(10000):
        for batch in range(0, X_train.shape[0], batch_size):

            y_pred_train = model(X_train_torch[batch:batch+batch_size])


            loss_train = loss_fn(y_pred_train, y_train_torch.reshape(-1, 1)[batch:batch+batch_size])

            optimizer.zero_grad()
            loss_train.backward()
            optimizer.step()
            
        train_predict = model(X_train_torch).detach().cpu().numpy()
        test_predict = model(X_test_torch).detach().cpu().numpy()
        r2_train = r2_score(np.ravel(train_predict), np.ravel(y_train))
        r2_test = r2_score(np.ravel(test_predict), np.ravel(y_test))
        mse_train = mean_squared_error(np.ravel(train_predict), np.ravel(y_train))
        mse_test = mean_squared_error(np.ravel(test_predict), np.ravel(y_test))
        loses_by_epoch[region].append([r2_train, r2_test, mse_train, mse_test])
        if t%1000==0:
            print('Iteration:',t)
            print('train:',r2_score(np.ravel(train_predict), np.ravel(y_train)))
            print('test:',r2_score(np.ravel(test_predict), np.ravel(y_test)))
            print('train:',mean_squared_error(np.ravel(train_predict), np.ravel(y_train)))
            print('test:',mean_squared_error(np.ravel(test_predict), np.ravel(y_test)))
    torch.save(model.state_dict(), './models/pytorch_model_{0}_mens.pt'.format(region))

(147684, 48)
x34
Iteration: 0
train: -477.0328144104164
test: -478.84249911189795
train: 0.22226559049493302
test: 0.22477303787910452
Iteration: 1000
train: 0.677063859347943
test: 0.4500079486015237
train: 0.04709342702340032
test: 0.07959051970703594
Iteration: 2000
train: 0.686387049780744
test: 0.4184319022095211
train: 0.04532878407023519
test: 0.08347626172304999
Iteration: 3000
train: 0.6918307499017923
test: 0.4005312364471554
train: 0.04516921049882837
test: 0.08729954453206014
Iteration: 4000
train: 0.684349267471236
test: 0.3854981311320129
train: 0.04533310232215577
test: 0.0878788789113663
Iteration: 5000
train: 0.6580932678215812
test: 0.3363455593246918
train: 0.04566660172395683
test: 0.08802558716969354
Iteration: 6000
train: 0.6916765407815615
test: 0.3827279052984418
train: 0.044855138451940446
test: 0.0891251358106201
Iteration: 7000
train: 0.6999059637113312
test: 0.3845728647402309
train: 0.043948724946570886
test: 0.08972106019069073
Iteration: 8000
train: 0.708

Iteration: 8000
train: 0.8759470939573573
test: 0.7600743914161987
train: 0.044522060104898974
test: 0.08605699281855704
Iteration: 9000
train: 0.8793246061029493
test: 0.7622569006178777
train: 0.043632790733763326
test: 0.08592901752754432
