In [16]:
import matplotlib.pyplot as plt
import numpy as np
import torch
import pandas as pd
import torchvision
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import math
from IPython.utils import io
from skorch import NeuralNetClassifier
from sklearn.model_selection import cross_val_score

## Model

In [17]:
class TwoLayerNet(torch.nn.Module):
    def __init__(self, D_in=5, H=10, D_out=2):
        """
        In the constructor we instantiate two nn.Linear modules and assign them as
        member variables.
        """
        super(TwoLayerNet, self).__init__()
        self.linear1 = torch.nn.Linear(D_in, H)
        self.linear2 = torch.nn.Linear(H, 10)
        self.linear3 = torch.nn.Linear(10, D_out)

    def forward(self, x):
        """
        In the forward function we accept a Tensor of input data and we must return
        a Tensor of output data. We can use Modules defined in the constructor as
        well as arbitrary operators on Tensors.
        """
        h_relu = self.linear1(x).clamp(min=0)
        h2_relu = self.linear2(h_relu).sigmoid()
        y_pred = self.linear3(h2_relu)
        return y_pred

In [18]:
## Define the net to work with skorch
net = NeuralNetClassifier(
    TwoLayerNet,
    max_epochs=100,
    lr=0.01,
    criterion = torch.nn.CrossEntropyLoss,
    optimizer = torch.optim.Adam,
    # Shuffle training data on each epoch
    iterator_train__shuffle=True,
    callbacks = []
)

## Create Dataset with label being one abcsissa

In [19]:
# Data
PT_data_complete = pd.read_excel("../PTResults-1000.xlsx")
# clean up
PT_data_complete = PT_data_complete.drop(columns = 'Unnamed: 0')
print(PT_data_complete.columns)

Index([    'Temperture[K]',            'AoA[o]',              'Mach',
                'MVD[mum]',        'rho[kg/m3]',                   0,
       0.00847867126218693, 0.01444256487321753, 0.02069919709045778,
       0.02724956034185687,
       ...
        0.9365419465310392,  0.9444633406916711,  0.9521319546962828,
        0.9595663922510524,  0.9666372639509644,  0.9733152944189551,
        0.9796956385260924,  0.9857709249503196,   0.991539343140028,
        0.9971741078954962],
      dtype='object', length=181)


In [20]:
def PT_data_generator(label_col):
    '''
    Args:
        label_col (int, >4, <181): column to be considered the label
    '''
    # Make sure the argument is correct
    assert 4<label_col<181
    
    # Create the new dataframe
    PT_data = PT_data_complete.copy()
    
    # Create the column with label
    col_name = PT_data.columns[label_col]
    PT_data['label'] = PT_data[col_name]
    
    # Delete all not needed columns
    PT_data = PT_data.drop(columns = PT_data.columns[5:-1])
    return PT_data

PT_data = PT_data_generator(132)
print(PT_data)

     Temperture[K]    AoA[o]      Mach  MVD[mum]  rho[kg/m3]  label
0       249.830880  7.203245  0.300057  0.000022    0.521074      1
1       236.843544  1.862602  0.472780  0.000026    0.844524      1
2       249.917781  6.852195  0.402226  0.000045    0.422595      1
3       259.968700  4.173048  0.579345  0.000016    0.563434      1
4       265.179783  9.682616  0.456712  0.000038    1.123021      1
5       268.934267  0.850442  0.319527  0.000017    1.124468      0
6       237.083873  4.211076  0.778945  0.000031    0.970799      1
7       245.770625  6.865009  0.717313  0.000011    1.018869      0
8       272.704444  7.481657  0.440222  0.000042    0.485161      1
9       251.065741  9.085955  0.446807  0.000022    0.507274      1
10      233.924678  6.788355  0.405814  0.000021    0.805548      1
11      235.284502  5.741176  0.373364  0.000034    0.977301      1
12      237.243377  4.140560  0.647200  0.000027    0.441212      1
13      254.585856  6.637946  0.557445  0.000048

In [21]:
# Adapt the data to fit with skorch functions 
# can add to the function eventually
label_col = 131
PT_data = PT_data_generator(131)
PT_numpy = PT_data.values
X = PT_numpy[:,:-1]
y = PT_numpy[:,-1]
X = X.astype(np.float32)
y = y.astype(np.int64)

In [None]:
precision_mean = [] 
precision_std = []
recall_mean = [] 
recall_std = []
for i in range(5,181):
    print(i)
    with io.capture_output() as captured:
        precision = cross_val_score(net, X, y, scoring = 'precision', cv=10)
        recall = cross_val_score(net, X, y, scoring = 'recall', cv=10)
    precision_mean.append(precision.mean())
    precision_std.append(precision.std())
    recall_mean.append(recall.mean())
    recall_std.append(recall.std())

5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99


In [150]:
##################################

## PLOTTING RESULTS ##

plt.rc('font',**{'family':'serif','serif':['Palatino']})
plt.rc('text', usetex=True)

#plt.plot(abscissa, prec_error_3,color='green',label='Tree depth 3')
#plt.plot(abscissa, prec_error_6,color='green',label='Tree depth 6')
plt.plot(abscissa, prec_error_9,color='green',label='Tree depth 9')
plt.fill_between(abscissa, prec_error_9-prec_errstd_9, prec_error_9+prec_errstd_9,  # fills area according to standard deviation
    alpha=0.5,color='#e0e0e0')

plt.xlabel(r'\textbf{Abscissa}',fontsize=12)
plt.ylabel(r'\textbf{Precision} [\%]',fontsize=16)
plt.ylim(-0.5,105)
plt.legend()

plt.show()

plt.rc('font',**{'family':'serif','serif':['Palatino']})
plt.rc('text', usetex=True)

#plt.plot(abscissa, rec_error_3,color='green',label='Tree depth 3')
#plt.plot(abscissa, rec_error_6,color='green',label='Tree depth 6')
plt.plot(abscissa, rec_error_9,color='green',label='Tree depth 9')
plt.fill_between(abscissa, rec_error_9-rec_errstd_9, rec_error_9+rec_errstd_9,  # fills area according to standard deviation
    alpha=0.5,color='#e0e0e0')

plt.xlabel(r'\textbf{Abscissa}',fontsize=12)
plt.ylabel(r'\textbf{Recall} [\%]',fontsize=16)
plt.ylim(-0.5,105)
plt.legend()

plt.show()
exit(0)

0.9971741078954962


## Data with all values

In [3]:
## Reorganize the data set
import torch
import pandas as pd

# N is size of the training set; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H= 900, 6, 10

# Data
PT_data = pd.read_excel("../PTResults-1000.xlsx")
ices = np.zeros(175*1000)
for i in range(len(PT_data)):
    ice = PT_data.loc[i].iloc[7:len(PT_data.columns)]
    ices[i*175:(i+1)*175] = ice.values

PT_data = PT_data.iloc[np.repeat(np.arange(len(PT_data)), 175)]
drop_icol = list(range(6,len(PT_data.columns)))

PT_data = PT_data.drop(PT_data.columns[drop_icol],axis=1)
PT_data['loc'] = np.array(list(range(175))*1000)
PT_data['ice'] = ices
PT_numpy = PT_data.values
X = PT_numpy[:,:-1]
y = PT_numpy[:,-1]
X = X.astype(np.float32)
y = y.astype(np.int64)

torch.Size([900, 6]) torch.Size([174100, 6])
torch.Size([900]) torch.Size([174100])


In [7]:
precision_mean = [] 
precision_std = []
recall_mean = [] 
recall_std = []
for i in range(5,181):
    print(i)
    with io.capture_output() as captured:
        precision = cross_val_score(net, X, y, scoring = 'precision', cv=10)
        recall = cross_val_score(net, X, y, scoring = 'recall', cv=10)
    precision_mean.append(precision.mean())
    precision_std.append(precision.std())
    recall_mean.append(recall.mean())
    recall_std.append(recall.std())

[Test number 0]
tensor(160776)
Training accuracy:  0.9611111283302307
Testing accuracy:  0.9234692454338074
[Test number 1]
tensor(159991)
Training accuracy:  0.9611111283302307
Testing accuracy:  0.9189603924751282
[Test number 2]
tensor(159315)
Training accuracy:  0.9599999785423279
Testing accuracy:  0.9150775671005249
[Test number 3]
tensor(130356)
Training accuracy:  0.8244444727897644
Testing accuracy:  0.7487421035766602
[Test number 4]
tensor(160586)
Training accuracy:  0.9622222185134888
Testing accuracy:  0.9223779439926147
[Test number 5]
tensor(160475)
Training accuracy:  0.95333331823349
Testing accuracy:  0.9217403531074524
[Test number 6]
tensor(159183)
Training accuracy:  0.9633333086967468
Testing accuracy:  0.9143193364143372
[Test number 7]
tensor(160951)
Training accuracy:  0.9599999785423279
Testing accuracy:  0.9244744181632996
[Test number 8]
tensor(158990)
Training accuracy:  0.9555555582046509
Testing accuracy:  0.9132108092308044
[Test number 9]
tensor(158777)