In [1]:
import torch
import os, requests
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_squared_error, accuracy_score
from sklearn.utils import shuffle
import random
from scipy import stats
import torch.optim as optim
from sklearn.preprocessing import StandardScaler

In [5]:
def pytorch_mlp(X, y): 
    D_in, H, D_out = X.shape[1], 100, 1
    kf = KFold(n_splits=5)
    kf.get_n_splits(X)
    test_accs = []
    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
        X_train, X_test = torch.from_numpy(X_train), torch.from_numpy(X_test)
        y_train = torch.from_numpy(y_train)
        model = torch.nn.Sequential(
            torch.nn.Linear(D_in, H),
            torch.nn.Dropout(0.5),
            torch.nn.ReLU(),
            torch.nn.Linear(H, D_out),
            torch.nn.Dropout(0.2)
        )
        learning_rate = 1e-2
        loss_fn = torch.nn.BCEWithLogitsLoss()
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
        for t in range(500):
            y_pred = model(X_train.float())
            loss = loss_fn(y_pred.squeeze(), y_train.float())
            model.zero_grad()
            loss.backward()
            optimizer.step()
            with torch.no_grad():
                for param in model.parameters():
                    param -= learning_rate * param.grad
        y_test_pred = model(X_test.float())
        y_test_pred = torch.sigmoid(y_test_pred)
        y_pred_tag = torch.round(y_test_pred)
        test_acc = accuracy_score(y_pred_tag.detach().numpy(), y_test)
        print(test_acc)
        test_accs.append(test_acc)
    return np.asarray(test_accs).mean(), stats.sem(np.asarray(test_accs))

In [6]:
data11 = np.load('data_11_pca.npz')
X = data11['data']
y = data11['labels']
X, y = shuffle(X, y)

acc_11_pca, sem_11_pca = pytorch_mlp(X,y)
print(acc_11_pca, sem_11_pca)

random.shuffle(y)
acc_11_pca_random, sem_11_pca_random = pytorch_mlp(X,y)
print(acc_11_pca_random, sem_11_pca_random)

0.8947368421052632
0.631578947368421
0.6842105263157895
0.42105263157894735
0.7368421052631579
0.6736842105263158 0.07699336230698839
0.42105263157894735
0.5789473684210527
0.631578947368421
0.5789473684210527
0.2631578947368421
0.4947368421052631 0.06781104593013224


In [7]:
data_all = np.load('data_all_pca.npz')
X = data_all['data']
y = data_all['labels']
X, y = shuffle(X, y)

acc_all_pca, sem_all_pca = pytorch_mlp(X,y)
print(acc_all_pca, sem_all_pca)

random.shuffle(y)
acc_all_pca_random, sem_all_pca_random = pytorch_mlp(X,y)
print(acc_all_pca_random, sem_all_pca_random)

0.4672897196261682
0.5046728971962616
0.48598130841121495
0.45794392523364486
0.4672897196261682
0.47663551401869153 0.008359132626167429
0.4485981308411215
0.5887850467289719
0.5887850467289719
0.4485981308411215
0.4392523364485981
0.502803738317757 0.035143169489650766
