In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
os.chdir('..')

In [2]:
import pickle
import re
import numpy as np
import pandas as pd
import random
import torch
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
import tqdm
import joblib
import os
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import json
from uncertainties import ufloat
from modules.function import PTR, special_formatting, check_cuda, PTR_modified, image_modified, get_metrics
from modules.representation_schemes import get_PTR_features
from modules.encoder import Encoder, Identity, Encoder1D, EncoderDNN, count_parameters

In [3]:
!jupyter nbextension enable --py widgetsnbextension

Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: [32mOK[0m


In [4]:
random.seed(0)
torch.manual_seed(0)

<torch._C.Generator at 0x7f073670cc30>

In [5]:
gfa_dataset_file = 'gfa_dataset.txt'
common_path = "Files_from_GTDL_paper/{}" 
gfa_dataset = pickle.load(open(common_path.format(gfa_dataset_file), 'rb'))  

In [6]:
gfa_i=[]
gfa_a=[]
gfa_b=[]
gfa_c=[]
to_discard = ['Rf','Db','Sg','Bh','Hs']
for i in  gfa_dataset:
    tx_gfa=re.findall('\[[a-c]?\]', i)
    tx1_element=re.findall('[A-Z][a-z]?', i)#[B, Fe, P,No]
    if len(set(tx1_element).intersection(set(to_discard))) == 0:      
        gfa_i.extend(tx_gfa)
        if tx_gfa[0]=='[a]':
            gfa_a.append(gfa_dataset.index(i))
        elif tx_gfa[0]=='[b]':
            gfa_b.append(gfa_dataset.index(i)) 
        else:
            gfa_c.append(gfa_dataset.index(i))
        
gfa_data_form=[]
gfa_data_form_p = []
gfa_data_form_b=[]

#------------------------------------------------------------------------------
#map raw data to 2-D image using PTR
for i in gfa_a:
    x,p,y = PTR(gfa_dataset[i])
    gfa_data_form=gfa_data_form+x
    gfa_data_form_p = gfa_data_form_p+p
    gfa_data_form_b=gfa_data_form_b+y
for i in gfa_c:
    x,p,y = PTR(gfa_dataset[i])
    gfa_data_form=gfa_data_form+x
    gfa_data_form_p = gfa_data_form_p+p
    gfa_data_form_b=gfa_data_form_b+y 
for i in gfa_b:
    x,p,y = PTR(gfa_dataset[i])
    gfa_data_form=gfa_data_form+x
    gfa_data_form_p = gfa_data_form_p+p
    gfa_data_form_b=gfa_data_form_b+y

X_all = np.array(gfa_data_form).reshape(-1, 1,9, 18).astype('float32') 
y_all = np.array(gfa_data_form_b).reshape(-1,1).astype('float32')
p_all = np.array(gfa_data_form_p).reshape(-1,1).astype('float32')

In [7]:
saveloc = 'saved_models/Encoders/PTR'
if not os.path.exists(saveloc):
        os.makedirs(f'{saveloc}')
with open('misc/kfold.pkl','rb') as fid:
    fold_dict = pickle.load(fid)
folds = len(list(fold_dict.keys()))

In [10]:
cuda = check_cuda()
metrics_list = {}
for k in range(folds):
    print('Fold {}'.format(k))
    #--------------------------------------------------------------------------

    i_tr= fold_dict[k]['train_inds']
    i_te= fold_dict[k]['test_inds']
    #i_te=i_te[2*k::20]+i_te[1+2*k::20]
    X_train, X_test = X_all[i_tr], X_all[i_te]
    y_train, y_test = y_all[i_tr], y_all[i_te]
    p_train, p_test = p_all[i_tr], p_all[i_te]
    batch = 64
    Xy = [(X_train[i],y_train[i],p_train[i]) for i in range(len(y_train))]
    train_loader = DataLoader(Xy, batch_size = batch , shuffle=True)
    gfa_Encoder = Encoder(1,1)
    e_optimizer = optim.Adam(gfa_Encoder.parameters(),lr = 2e-4)
    num_iterations = 2000

    if cuda:
        gfa_Encoder = gfa_Encoder.cuda()
    log_interval = int(5e2)
    for iter in tqdm.notebook.tqdm(range(num_iterations)):
        train_loss = 0.0
        for data in train_loader:
            X,y,p = data
            if cuda:
                X = X.cuda()
                y = y.cuda()
                p = p.cuda()
            e_optimizer.zero_grad()
            target = gfa_Encoder(X,p)
            if cuda:
                target = target.cuda()
            e_error = torch.nn.BCELoss()(target,y)
            e_error.backward(retain_graph=True)
            e_optimizer.step()
            train_loss += e_error.cpu().item()
        if iter == 0 or (iter + 1) % log_interval == 0:  
            print('Epoch : {}, Loss : {}'.format(iter+1,train_loss))
    model_scripted = torch.jit.script(gfa_Encoder.cpu())
    model_scripted.save(os.path.join(saveloc,'Encoder2D_ptr_fold{}.pt'.format(k)))
    if X_test.dtype != torch.float32:
        X_test = torch.from_numpy(X_test)
        p_test = torch.from_numpy(p_test)
    if cuda:
        X_test =  X_test.cuda()
        p_test = p_test.cuda()
    y_predict = (gfa_Encoder(X_test,p_test)).to('cpu').detach().numpy()
    metrics = get_metrics(y_test,np.round(y_predict))
    metrics_list[k] = metrics
    print('accuracy : {},precision : {},recall : {},F1 : {}'.format(metrics[0],metrics[1],metrics[2],metrics[3]))

Fold 0


  0%|          | 0/2000 [00:00<?, ?it/s]

Epoch : 1, Loss : 183.17983329296112
Epoch : 500, Loss : 39.43397355824709
Epoch : 1000, Loss : 31.46480217948556
Epoch : 1500, Loss : 27.505871287547052
Epoch : 2000, Loss : 25.26799281127751
accuracy : 0.942,precision : 0.9417,recall : 0.942,F1 : 0.9416
Fold 1


  0%|          | 0/2000 [00:00<?, ?it/s]

Epoch : 1, Loss : 182.32558631896973
Epoch : 500, Loss : 42.68794519454241
Epoch : 1000, Loss : 34.4045373685658
Epoch : 1500, Loss : 30.894036013633013
Epoch : 2000, Loss : 27.66494202706963
accuracy : 0.9554,precision : 0.9554,recall : 0.9554,F1 : 0.9554
Fold 2


  0%|          | 0/2000 [00:00<?, ?it/s]

Epoch : 1, Loss : 186.03058871626854
Epoch : 500, Loss : 40.57861889153719
Epoch : 1000, Loss : 32.9983250759542
Epoch : 1500, Loss : 28.567957000806928
Epoch : 2000, Loss : 25.73964098468423
accuracy : 0.955,precision : 0.9548,recall : 0.955,F1 : 0.9548
Fold 3


  0%|          | 0/2000 [00:00<?, ?it/s]

Epoch : 1, Loss : 185.46952664852142
Epoch : 500, Loss : 41.90808242559433
Epoch : 1000, Loss : 33.9031830355525
Epoch : 1500, Loss : 29.80672311037779
Epoch : 2000, Loss : 27.47973731905222
accuracy : 0.9459,precision : 0.9478,recall : 0.9459,F1 : 0.9463
Fold 4


  0%|          | 0/2000 [00:00<?, ?it/s]

Epoch : 1, Loss : 184.0010987520218
Epoch : 500, Loss : 37.452142123132944
Epoch : 1000, Loss : 28.9424535240978
Epoch : 1500, Loss : 24.981969507411122
Epoch : 2000, Loss : 23.030844592489302
accuracy : 0.9435,precision : 0.9438,recall : 0.9435,F1 : 0.9436
Fold 5


  0%|          | 0/2000 [00:00<?, ?it/s]

Epoch : 1, Loss : 184.2721265554428
Epoch : 500, Loss : 38.498045548796654
Epoch : 1000, Loss : 31.151646917685866
Epoch : 1500, Loss : 26.44242448732257
Epoch : 2000, Loss : 23.520072928629816
accuracy : 0.954,precision : 0.9539,recall : 0.954,F1 : 0.954
Fold 6


  0%|          | 0/2000 [00:00<?, ?it/s]

Epoch : 1, Loss : 181.22463032603264
Epoch : 500, Loss : 41.60358419269323
Epoch : 1000, Loss : 33.3487838935107
Epoch : 1500, Loss : 30.080545926466584
Epoch : 2000, Loss : 26.341428384184837
accuracy : 0.954,precision : 0.9565,recall : 0.954,F1 : 0.9545
Fold 7


  0%|          | 0/2000 [00:00<?, ?it/s]

Epoch : 1, Loss : 183.76460376381874
Epoch : 500, Loss : 41.98644769191742
Epoch : 1000, Loss : 33.68509687297046
Epoch : 1500, Loss : 29.527751888148487
Epoch : 2000, Loss : 26.40369381196797
accuracy : 0.9511,precision : 0.9509,recall : 0.9511,F1 : 0.9508
Fold 8


  0%|          | 0/2000 [00:00<?, ?it/s]

Epoch : 1, Loss : 185.4059585928917
Epoch : 500, Loss : 41.37420576438308
Epoch : 1000, Loss : 33.64906053803861
Epoch : 1500, Loss : 29.98067594319582
Epoch : 2000, Loss : 27.401899557560682
accuracy : 0.9444,precision : 0.9443,recall : 0.9444,F1 : 0.9439
Fold 9


  0%|          | 0/2000 [00:00<?, ?it/s]

Epoch : 1, Loss : 186.35210025310516
Epoch : 500, Loss : 41.667573511600494
Epoch : 1000, Loss : 35.08071529492736
Epoch : 1500, Loss : 30.448315019719303
Epoch : 2000, Loss : 27.569192219525576
accuracy : 0.9478,precision : 0.9475,recall : 0.9478,F1 : 0.9474


In [4]:
sup_dict = {}
sup_dict['ptr'] = metrics_list
import json
with open('results/representation_stats.json', 'w') as f:
    json.dump(sup_dict, f)

NameError: name 'metrics_list' is not defined

In [4]:
import json
with open('results/representation_stats.json', 'r') as f:
    sup_dict = json.load(f)

In [7]:
metrics_list = sup_dict['ptr']

In [8]:
col_names = ['Accuracy','Precision','Recall','F1 Score']
stat_df = pd.DataFrame.from_dict(metrics_list,orient='index',columns=col_names)
mean_stats, std_stats = stat_df.values.mean(axis=0), stat_df.values.std(axis=0)
best_ind = np.argwhere(stat_df['F1 Score'].values == np.max(stat_df['F1 Score'])).item()
vals = [ufloat(np.round(mean_stats[i],3), np.round(std_stats[i],3)) for i in range(len(mean_stats))]
print('Accuracy : {}, \nPrecision : {}, \nRecall : {}, \nF1 Score : {}'.format(vals[0],vals[1],vals[2],vals[3]))

Accuracy : 0.949+/-0.005, 
Precision : 0.950+/-0.005, 
Recall : 0.949+/-0.005, 
F1 Score : 0.949+/-0.005


In [9]:
import shutil
saveloc = 'saved_models/Encoders/PTR'
best_model_loc = 'saved_models/best_models'
if not os.path.exists(best_model_loc):
    os.makedirs(f'{best_model_loc}')
best_model_name = sorted(os.listdir(saveloc))[best_ind]
shutil.copy(os.path.join(saveloc,best_model_name), os.path.join(best_model_loc,'2DEncoder_PTR.pt')) 

'saved_models/best_models/2DEncoder_PTR.pt'