In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
os.chdir('..')

In [25]:
import pickle
import re
import numpy as np
import pandas as pd
import random
import torch
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
import tqdm
import joblib
import os
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import json
from uncertainties import ufloat
from modules.function import PTR, special_formatting, check_cuda, PTR_modified, image_modified, get_metrics
from modules.representation_schemes import get_PTR_features
from modules.encoder import Encoder, Identity

In [3]:
!jupyter nbextension enable --py widgetsnbextension

Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: [32mOK[0m


In [4]:
random.seed(0)
torch.manual_seed(0)

<torch._C.Generator at 0x7fe4e0438c70>

In [5]:
gfa_dataset_file = 'gfa_dataset.txt'
common_path = "Files_from_GTDL_paper/{}" 
gfa_dataset = pickle.load(open(common_path.format(gfa_dataset_file), 'rb'))  

In [6]:
gfa_i=[]
gfa_a=[]
gfa_b=[]
gfa_c=[]
for i in  gfa_dataset:
    tx_gfa=re.findall('\[[a-c]?\]', i)#[B, Fe, P,No]
    gfa_i.extend(tx_gfa)
    if tx_gfa[0]=='[a]':
        gfa_a.append(gfa_dataset.index(i))
    elif tx_gfa[0]=='[b]':
        gfa_b.append(gfa_dataset.index(i)) 
    else:
        gfa_c.append(gfa_dataset.index(i))
        
gfa_data_form=[]
gfa_data_form_b=[]
#------------------------------------------------------------------------------
#map raw data to 2-D image using PTR
for i in gfa_a:
    x,y = PTR_modified(gfa_dataset[i])
    gfa_data_form=gfa_data_form+x
    gfa_data_form_b=gfa_data_form_b+y
for i in gfa_c:
    x,y = PTR_modified(gfa_dataset[i])
    gfa_data_form=gfa_data_form+x
    gfa_data_form_b=gfa_data_form_b+y 
for i in gfa_b:
    x,y = PTR_modified(gfa_dataset[i])
    gfa_data_form=gfa_data_form+x
    gfa_data_form_b=gfa_data_form_b+y

X_all = np.array(gfa_data_form).reshape(-1, 1,9, 18).astype('float32') 
y_all = np.array(gfa_data_form_b).reshape(-1,1).astype('float32')

In [7]:
saveloc = 'saved_models/Encoders/PTR'
if not os.path.exists(saveloc):
        os.makedirs(f'{saveloc}')

In [None]:
cuda = check_cuda()
metrics_list = {}
for k in range(10):
    print('Fold {}'.format(k))
    #--------------------------------------------------------------------------
    i_tr=[i for i in range(len(y_all))]
    i_te= np.random.choice(i_tr,int(len(i_tr)*0.20), replace=False)
    #i_te=i_te[2*k::20]+i_te[1+2*k::20]
    for i in i_te:
        i_tr.remove(i)
    X_train, X_test = X_all[i_tr], X_all[i_te]
    y_train, y_test = y_all[i_tr], y_all[i_te]
    batch = 64
    Xy = [(X_train[i],y_train[i]) for i in range(len(y_train))]
    train_loader = DataLoader(Xy, batch_size = batch , shuffle=True)
    gfa_Encoder = Encoder(1,1)
    e_optimizer = optim.Adam(gfa_Encoder.parameters(),lr = 2e-4)
    num_iterations = 2000

    if cuda:
        gfa_Encoder = gfa_Encoder.cuda()
    log_interval = int(5e2)
    for iter in tqdm.notebook.tqdm(range(num_iterations)):
        train_loss = 0.0
        for data in train_loader:
            X,y = data
            if cuda:
                X = X.cuda()
                y = y.cuda()
            e_optimizer.zero_grad()
            target = gfa_Encoder(X)
            if cuda:
                target = target.cuda()
            e_error = torch.nn.BCELoss()(target,y)
            e_error.backward(retain_graph=True)
            e_optimizer.step()
            train_loss += e_error.cpu().item()
        if iter == 0 or (iter + 1) % log_interval == 0:  
            print('Epoch : {}, Loss : {}'.format(iter+1,train_loss))

    joblib.dump(gfa_Encoder,os.path.join(saveloc,'Encoder2D_ptr_fold{}.pt'.format(k)))
    if X_test.dtype != torch.float32:
        X_test = torch.from_numpy(X_test)
    if cuda:
        X_test =  X_test.cuda()
    y_predict = (gfa_Encoder(X_test)).to('cpu').detach().numpy()
    metrics = get_metrics(y_test,np.round(y_predict))
    metrics_list[k] = metrics
    print('accuracy : {},precision : {},recall : {},F1 : {}'.format(metrics[0],metrics[1],metrics[2],metrics[3]))

Fold 0


  0%|          | 0/2000 [00:00<?, ?it/s]

Epoch : 1, Loss : 90.42927724123001
Epoch : 500, Loss : 25.389043539762497
Epoch : 1000, Loss : 20.406454630196095
Epoch : 1500, Loss : 17.808362305164337
Epoch : 2000, Loss : 15.661701884120703
accuracy : 0.9148,precision : 0.9152,recall : 0.9148,F1 : 0.9146
Fold 1


  0%|          | 0/2000 [00:00<?, ?it/s]

Epoch : 1, Loss : 90.49303877353668
Epoch : 500, Loss : 25.25445021688938
Epoch : 1000, Loss : 19.934607729315758
Epoch : 1500, Loss : 17.780681535601616
Epoch : 2000, Loss : 15.741295136511326
accuracy : 0.9296,precision : 0.93,recall : 0.9296,F1 : 0.9294
Fold 2


  0%|          | 0/2000 [00:00<?, ?it/s]

Epoch : 1, Loss : 90.43790435791016
Epoch : 500, Loss : 27.48120227456093
Epoch : 1000, Loss : 21.43263839185238
Epoch : 1500, Loss : 18.89907817170024
Epoch : 2000, Loss : 16.433014675974846
accuracy : 0.9224,precision : 0.9228,recall : 0.9224,F1 : 0.9225
Fold 3


  0%|          | 0/2000 [00:00<?, ?it/s]

Epoch : 1, Loss : 90.31334483623505
Epoch : 500, Loss : 25.99514551460743
Epoch : 1000, Loss : 20.33621884509921
Epoch : 1500, Loss : 17.645549934357405
Epoch : 2000, Loss : 16.00643054768443
accuracy : 0.9085,precision : 0.91,recall : 0.9085,F1 : 0.9086
Fold 4


  0%|          | 0/2000 [00:00<?, ?it/s]

Epoch : 1, Loss : 90.57363539934158
Epoch : 500, Loss : 26.39319644868374
Epoch : 1000, Loss : 21.803099505603313
Epoch : 1500, Loss : 18.982519332319498
Epoch : 2000, Loss : 16.82677185535431
accuracy : 0.9219,precision : 0.9219,recall : 0.9219,F1 : 0.9219
Fold 5


  0%|          | 0/2000 [00:00<?, ?it/s]

Epoch : 1, Loss : 90.32925391197205
Epoch : 500, Loss : 27.08493160456419
Epoch : 1000, Loss : 22.785734441131353
Epoch : 1500, Loss : 19.005338810384274
Epoch : 2000, Loss : 17.448392372578382
accuracy : 0.9114,precision : 0.9115,recall : 0.9114,F1 : 0.9113
Fold 6


  0%|          | 0/2000 [00:00<?, ?it/s]

Epoch : 1, Loss : 90.49720102548599
Epoch : 500, Loss : 26.124778173863888
Epoch : 1000, Loss : 20.94458582997322
Epoch : 1500, Loss : 17.707814309746027
Epoch : 2000, Loss : 16.623765226453543
accuracy : 0.9219,precision : 0.9229,recall : 0.9219,F1 : 0.9216
Fold 7


  0%|          | 0/2000 [00:00<?, ?it/s]

Epoch : 1, Loss : 90.39806795120239
Epoch : 500, Loss : 25.458346016705036
Epoch : 1000, Loss : 20.132190257310867
Epoch : 1500, Loss : 17.869455505162477
Epoch : 2000, Loss : 16.31752660870552
accuracy : 0.9258,precision : 0.9261,recall : 0.9258,F1 : 0.9257
Fold 8


  0%|          | 0/2000 [00:00<?, ?it/s]

Epoch : 1, Loss : 90.46083587408066
Epoch : 500, Loss : 27.208476692438126
Epoch : 1000, Loss : 20.64193169400096
Epoch : 1500, Loss : 17.129823226481676
Epoch : 2000, Loss : 15.35202245786786
accuracy : 0.9272,precision : 0.9273,recall : 0.9272,F1 : 0.9272
Fold 9


  0%|          | 0/2000 [00:00<?, ?it/s]

Epoch : 1, Loss : 90.46387475728989
Epoch : 500, Loss : 24.868167266249657
Epoch : 1000, Loss : 19.698240876197815
Epoch : 1500, Loss : 17.281819328665733
Epoch : 2000, Loss : 15.179746977984905
accuracy : 0.9272,precision : 0.9273,recall : 0.9272,F1 : 0.9271


In [10]:
sup_dict = {}
sup_dict['ptr'] = metrics_list
import json
with open('results/representation_stats.json', 'w') as f:
    json.dump(sup_dict, f)

In [29]:
col_names = ['Accuracy','Precision','Recall','F1 Score']
stat_df = pd.DataFrame.from_dict(metrics_list,orient='index',columns=col_names)
mean_stats, std_stats = stat_df.values.mean(axis=0), stat_df.values.std(axis=0)
best_ind = np.argwhere(stat_df['F1 Score'].values == np.max(stat_df['F1 Score'])).item()
vals = [ufloat(np.round(mean_stats[i],3), np.round(std_stats[i],3)) for i in range(len(mean_stats))]
print('Accuracy : {}, \nPrecision : {}, \nRecall : {}, \nF1 Score : {}'.format(vals[0],vals[1],vals[2],vals[3]))

Accuracy : 0.921+/-0.007, 
Precision : 0.922+/-0.007, 
Recall : 0.921+/-0.007, 
F1 Score : 0.921+/-0.007


In [36]:
import shutil
best_model_loc = 'saved_models/best_models'
if not os.path.exists(best_model_loc):
    os.makedirs(f'{best_model_loc}')
best_model_name = sorted(os.listdir(saveloc))[best_ind]
shutil.copy(os.path.join(saveloc,best_model_name), os.path.join(best_model_loc,'2DEncoder_PTR.pt')) 

'saved_models/best_models/2DEncoder_PTR.pt'