### Data augmentation using autoencoders
### 40 analytes dataset
#### Edgar Acuna
#### July 2021

In [1]:
import numpy as np
import time
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, roc_auc_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
df1=pd.read_csv("c://onr2020/NRLset1_part1.csv",header=None)
df2=pd.read_csv("c://onr2020/NRLset1_part2.csv",header=None)
df3=pd.read_csv("c://onr2020/NRLset1_part3.csv",header=None)
df4=pd.read_csv("c://onr2020/NRLset1_part4.csv",header=None)
df5=pd.read_csv("c://onr2020/NRLset1_part5.csv",header=None)
df6=pd.read_csv("c://onr2020/NRLset1_part6.csv",header=None)
df7=pd.read_csv("c://onr2020/NRLset1_part7.csv",header=None)
df8=pd.read_csv("c://onr2020/NRLset1_part8.csv",header=None)
y=pd.read_csv("c://onr2020/labels.csv",header=None)
ys=pd.read_csv("c://onr2020/substrateIDs.csv",header=None)
subs=pd.read_csv("c://onr2020/substrates.csv",header=None)
dfset1=pd.concat([df1,df2,df3,df4,df5,df6,df7,df8],ignore_index=True)
print('Size of the dataframe: {}'.format(dfset1.shape))

Size of the dataframe: (18000, 1701)


In [3]:
dfset2=dfset1.copy()
dfset2['Analyte']=y
dfset2['substrate']=ys
dfsub1=dfset2[dfset2['substrate']==1]
dfsub2=dfset2[dfset2['substrate']==2]
dfsub3=dfset2[dfset2['substrate']==3]
dfsub4=dfset2[dfset2['substrate']==4]
dfsub5=dfset2[dfset2['substrate']==5]
dfsub6=dfset2[dfset2['substrate']==6]
dfsub7=dfset2[dfset2['substrate']==7]
dfsub8=dfset2[dfset2['substrate']==8]
dfsub9=dfset2[dfset2['substrate']==9]
dfset2.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1693,1694,1695,1696,1697,1698,1699,1700,Analyte,substrate
0,0.041418,0.041621,0.042198,0.042688,0.042924,0.042274,0.042542,0.042663,0.042715,0.042664,...,0.053091,0.05314,0.053249,0.053325,0.053364,0.053356,0.0535,0.055986,26,2
1,0.69471,0.69584,0.69718,0.70653,0.70397,0.70913,0.71148,0.71133,0.71237,0.71298,...,0.75363,0.75449,0.75298,0.75576,0.75291,0.75532,0.75303,0.75323,18,3
2,0.048978,0.048432,0.047685,0.047086,0.046811,0.046752,0.046624,0.046443,0.046241,0.045999,...,0.25891,0.25821,0.25744,0.25658,0.2557,0.2548,0.25416,0.25711,8,7
3,0.039762,0.039495,0.038982,0.038339,0.037769,0.037301,0.036799,0.036316,0.035921,0.035612,...,0.25616,0.25537,0.25454,0.25372,0.25288,0.25197,0.25107,0.25019,32,7
4,0.022387,0.022508,0.022091,0.023054,0.02301,0.02274,0.023889,0.023936,0.023464,0.02481,...,0.024636,0.022298,0.023536,0.025714,0.025306,0.025062,0.023609,0.023901,30,1


### Extracting the nine substrates

In [4]:
dfsub1=dfset2[dfset2['substrate']==1]
dfsub2=dfset2[dfset2['substrate']==2]
dfsub3=dfset2[dfset2['substrate']==3]
dfsub4=dfset2[dfset2['substrate']==4]
dfsub5=dfset2[dfset2['substrate']==5]
dfsub6=dfset2[dfset2['substrate']==6]
dfsub7=dfset2[dfset2['substrate']==7]
dfsub8=dfset2[dfset2['substrate']==8]
dfsub9=dfset2[dfset2['substrate']==9]
#dfset1=pd.DataFrame(dfset1)

In [5]:
mdfsub1=dfsub1.iloc[:,0:1701]
mdfsub2=dfsub2.iloc[:,0:1701]
mdfsub3=dfsub3.iloc[:,0:1701]
mdfsub4=dfsub4.iloc[:,0:1701]
mdfsub5=dfsub5.iloc[:,0:1701]
mdfsub6=dfsub6.iloc[:,0:1701]
mdfsub7=dfsub7.iloc[:,0:1701]
mdfsub8=dfsub8.iloc[:,0:1701]
mdfsub9=dfsub9.iloc[:,0:1701]

In [6]:
s1=subs.loc[0,:]
s2=subs.loc[1,:]
s3=subs.loc[2,:]
s4=subs.loc[3,:]
s5=subs.loc[4,:]
s6=subs.loc[5,:]
s7=subs.loc[6,:]
s8=subs.loc[7,:]
s9=subs.loc[8,:]

In [7]:
modsub1=mdfsub1.apply(lambda x : x -(np.sum(np.array(x)*np.array(s1))/np.sum(np.array(s1)*np.array(s1)))*s1,axis=1)
modsub2=mdfsub2.apply(lambda x : x -(np.sum(np.array(x)*np.array(s2))/np.sum(np.array(s2)*np.array(s2)))*s2,axis=1)
modsub3=mdfsub3.apply(lambda x : x -(np.sum(np.array(x)*np.array(s3))/np.sum(np.array(s3)*np.array(s3)))*s3,axis=1)
modsub4=mdfsub4.apply(lambda x : x -(np.sum(np.array(x)*np.array(s4))/np.sum(np.array(s4)*np.array(s4)))*s4,axis=1)
modsub5=mdfsub5.apply(lambda x : x -(np.sum(np.array(x)*np.array(s5))/np.sum(np.array(s5)*np.array(s5)))*s5,axis=1)
modsub6=mdfsub6.apply(lambda x : x -(np.sum(np.array(x)*np.array(s6))/np.sum(np.array(s6)*np.array(s6)))*s6,axis=1)
modsub7=mdfsub7.apply(lambda x : x -(np.sum(np.array(x)*np.array(s7))/np.sum(np.array(s7)*np.array(s7)))*s7,axis=1)
modsub8=mdfsub8.apply(lambda x : x -(np.sum(np.array(x)*np.array(s8))/np.sum(np.array(s8)*np.array(s8)))*s8,axis=1)
modsub9=mdfsub9.apply(lambda x : x -(np.sum(np.array(x)*np.array(s9))/np.sum(np.array(s9)*np.array(s9)))*s9,axis=1)

### Data centered by substrates background

In [8]:
subdf=[modsub1,modsub2,modsub3,modsub4,modsub5,modsub6,modsub7,modsub8,modsub9]
cent_subs=pd.concat(subdf)
cent_subs.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1691,1692,1693,1694,1695,1696,1697,1698,1699,1700
4,0.002296,0.002603,0.002368,0.003507,0.003637,0.003537,0.00485,0.005059,0.004744,0.006244,...,-0.001858,-0.002531,-0.002143,-0.004422,-0.003123,-0.000884,-0.001225,-0.001406,-0.002792,-0.002432
5,-0.005471,0.003865,0.000351,-6e-05,-0.000258,-0.000582,0.002557,0.003072,0.000961,0.003941,...,-0.004245,-0.001915,-0.005149,-0.001717,-0.003814,-0.0031,-0.000355,-0.00054,-0.005481,-0.000857
8,-0.002496,0.016867,0.014204,0.005327,0.003649,0.006143,-0.01516,0.004201,0.003791,0.004599,...,0.002574,-0.01804,-0.018488,-0.006529,0.009548,0.004694,0.000399,0.018158,0.016369,0.007582
28,0.001313,0.001443,0.001536,0.001612,0.001687,0.001768,0.001866,0.001979,0.002079,0.002155,...,-0.004576,-0.00455,-0.004524,-0.004502,-0.004476,-0.004446,-0.004411,-0.004373,-0.004332,-0.004301
46,-0.001097,4e-05,0.001125,0.002056,0.001685,0.002006,0.000228,0.001179,0.001835,0.002005,...,-0.004655,-0.006438,-0.005102,-0.005563,-0.004804,-0.005517,-0.004411,-0.005738,-0.005031,-0.005584


### Normalizing the spectra

In [9]:
# Using sklearn
from sklearn.preprocessing import normalize
b=cent_subs.iloc[:,0:1701]
b.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1691,1692,1693,1694,1695,1696,1697,1698,1699,1700
4,0.002296,0.002603,0.002368,0.003507,0.003637,0.003537,0.00485,0.005059,0.004744,0.006244,...,-0.001858,-0.002531,-0.002143,-0.004422,-0.003123,-0.000884,-0.001225,-0.001406,-0.002792,-0.002432
5,-0.005471,0.003865,0.000351,-6e-05,-0.000258,-0.000582,0.002557,0.003072,0.000961,0.003941,...,-0.004245,-0.001915,-0.005149,-0.001717,-0.003814,-0.0031,-0.000355,-0.00054,-0.005481,-0.000857
8,-0.002496,0.016867,0.014204,0.005327,0.003649,0.006143,-0.01516,0.004201,0.003791,0.004599,...,0.002574,-0.01804,-0.018488,-0.006529,0.009548,0.004694,0.000399,0.018158,0.016369,0.007582
28,0.001313,0.001443,0.001536,0.001612,0.001687,0.001768,0.001866,0.001979,0.002079,0.002155,...,-0.004576,-0.00455,-0.004524,-0.004502,-0.004476,-0.004446,-0.004411,-0.004373,-0.004332,-0.004301
46,-0.001097,4e-05,0.001125,0.002056,0.001685,0.002006,0.000228,0.001179,0.001835,0.002005,...,-0.004655,-0.006438,-0.005102,-0.005563,-0.004804,-0.005517,-0.004411,-0.005738,-0.005031,-0.005584


In [10]:
#normalize(b)
b1=b.apply(lambda x: x/(x**2).sum()**.5, axis=1)
b1.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1691,1692,1693,1694,1695,1696,1697,1698,1699,1700
4,0.006593,0.007476,0.0068,0.010072,0.010446,0.010157,0.013928,0.014528,0.013623,0.017932,...,-0.005337,-0.007268,-0.006155,-0.0127,-0.008968,-0.002538,-0.003519,-0.004037,-0.008019,-0.006984
5,-0.016333,0.011538,0.001048,-0.000179,-0.00077,-0.001737,0.007633,0.00917,0.00287,0.011766,...,-0.012672,-0.005718,-0.015373,-0.005127,-0.011386,-0.009255,-0.00106,-0.001612,-0.016363,-0.002559
8,-0.005348,0.036134,0.030431,0.011412,0.007818,0.013162,-0.032478,0.008999,0.008123,0.009853,...,0.005515,-0.038647,-0.039608,-0.013988,0.020456,0.010057,0.000856,0.038902,0.035069,0.016243
28,0.004714,0.005182,0.005515,0.00579,0.006058,0.006349,0.0067,0.007108,0.007465,0.007738,...,-0.016436,-0.01634,-0.016247,-0.016168,-0.016075,-0.015967,-0.01584,-0.015707,-0.015558,-0.015445
46,-0.003245,0.000118,0.003326,0.006083,0.004984,0.005934,0.000673,0.003487,0.005428,0.005932,...,-0.013769,-0.019043,-0.01509,-0.016453,-0.014209,-0.016319,-0.013047,-0.016972,-0.014881,-0.016518


### Normalizing the data for autoenconders

In [11]:
import tensorflow as tf
import keras
from keras.models import Model, load_model
from keras import regularizers
from keras.layers import Input, Dense
from sklearn.preprocessing import  StandardScaler, MinMaxScaler
trainx =b1.iloc[:,0:1701]
trainx=trainx.to_numpy()
scaler = MinMaxScaler()
train_x= scaler.fit_transform(trainx)
train_x

array([[0.45711583, 0.48938046, 0.48649315, ..., 0.42916987, 0.48260493,
        0.43545418],
       [0.32434561, 0.51334761, 0.45458234, ..., 0.44226193, 0.43586269,
        0.4644868 ],
       [0.38796258, 0.65849522, 0.61759402, ..., 0.66098456, 0.72400473,
        0.58786694],
       ...,
       [0.60793619, 0.60722394, 0.40874603, ..., 0.30006901, 0.39184086,
        0.38102345],
       [0.811848  , 0.83086698, 0.65052654, ..., 0.36572404, 0.4499319 ,
        0.40042738],
       [0.60435644, 0.61476153, 0.41242006, ..., 0.37122376, 0.46287626,
        0.43094634]])

In [12]:
# No of Neurons in each Layer 
nb_epoch = 40
batch_size = 50
input_dim = train_x.shape[1] #num of columns, 1701
encoding_dim = 64
hidden_dim = int(encoding_dim / 2) #i.e. 32
#decoding_dim = 64
learning_rate = 1e-7

input_layer = Input(shape=(input_dim, ))
encoder = Dense(encoding_dim, activation="tanh", activity_regularizer=regularizers.l1(learning_rate))(input_layer)
encoder = Dense(hidden_dim, activation="relu")(encoder)
decoder = Dense(hidden_dim, activation='tanh')(encoder)
#decoder = Dense(decoding_dim, activation='relu')(decoder)
decoder = Dense(input_dim, activation='relu')(decoder)
autoencoder = Model(inputs=input_layer, outputs=decoder)
autoencoder.summary()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 1701)]            0         
_________________________________________________________________
dense (Dense)                (None, 64)                108928    
_________________________________________________________________
dense_1 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_2 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense_3 (Dense)              (None, 1701)              56133     
Total params: 168,197
Trainable params: 168,197
Non-trainable params: 0
_________________________________________________________________


In [13]:
import time
start_time = time.time()
autoencoder.compile(optimizer='adam', loss='mse' )

history = autoencoder.fit(train_x, train_x, epochs=nb_epoch,batch_size=batch_size, shuffle=True,validation_split=0.2,verbose=1)

print("--- %s seconds ---" % (time.time() - start_time))

df_history = pd.DataFrame(history.history) 

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
--- 21.760228633880615 seconds ---


In [14]:
predictions = autoencoder.predict(train_x)
print(predictions)

[[0.46056485 0.4835794  0.48675594 ... 0.40872523 0.49765897 0.        ]
 [0.44311392 0.45918366 0.45932528 ... 0.39894387 0.48653167 0.        ]
 [0.41271424 0.42285976 0.42456836 ... 0.41520587 0.50569165 0.        ]
 ...
 [0.5500539  0.5616297  0.5536279  ... 0.33776048 0.43143654 0.        ]
 [0.6398362  0.65822816 0.64466673 ... 0.32757968 0.4172185  0.        ]
 [0.5767274  0.59481716 0.58550996 ... 0.38992313 0.48328555 0.        ]]


In [15]:
predictions=pd.DataFrame(predictions)

In [16]:
predictions.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1691,1692,1693,1694,1695,1696,1697,1698,1699,1700
0,0.460565,0.483579,0.486756,0.49212,0.558143,0.547319,0.571584,0.539622,0.521876,0.494091,...,0.487231,0.49013,0.426272,0.474585,0.462609,0.490855,0.488666,0.408725,0.497659,0.0
1,0.443114,0.459184,0.459325,0.454957,0.511528,0.502045,0.526783,0.49736,0.48609,0.449556,...,0.476967,0.484295,0.41558,0.462455,0.453752,0.482757,0.48006,0.398944,0.486532,0.0
2,0.412714,0.42286,0.424568,0.423803,0.474847,0.461265,0.489338,0.459784,0.450983,0.422398,...,0.505846,0.512411,0.441172,0.48783,0.480978,0.502045,0.50068,0.415206,0.505692,0.0
3,0.473523,0.49542,0.492629,0.488196,0.547281,0.535176,0.559948,0.528346,0.512583,0.479831,...,0.459113,0.466331,0.396244,0.445663,0.435294,0.465793,0.460989,0.384487,0.470583,0.0
4,0.466921,0.488979,0.486755,0.483489,0.541455,0.529812,0.553299,0.521255,0.505341,0.47194,...,0.465931,0.473525,0.401898,0.451975,0.44139,0.471358,0.466638,0.389966,0.476114,0.0


In [17]:
train_x=pd.DataFrame(train_x)

In [18]:
augdata=pd.concat([train_x,predictions],ignore_index=True)

In [19]:
print(augdata.shape)

(36000, 1701)


In [20]:
a1=np.array(dfsub1['Analyte'].values.tolist())
a2=np.array(dfsub2['Analyte'].values.tolist())
a3=np.array(dfsub3['Analyte'].values.tolist())
a4=np.array(dfsub4['Analyte'].values.tolist())
a5=np.array(dfsub5['Analyte'].values.tolist())
a6=np.array(dfsub6['Analyte'].values.tolist())
a7=np.array(dfsub7['Analyte'].values.tolist())
a8=np.array(dfsub8['Analyte'].values.tolist())
a9=np.array(dfsub9['Analyte'].values.tolist())
y=np.concatenate((a1,a2,a3,a4,a5,a6,a7,a8,a9),axis=None)

In [21]:
augy=np.concatenate((a1,a2,a3,a4,a5,a6,a7,a8,a9,a1,a2,a3,a4,a5,a6,a7,a8,a9),axis=None)
augy=pd.DataFrame(augy)

In [22]:
augdata['Analyte']=augy
augdata.tail()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1692,1693,1694,1695,1696,1697,1698,1699,1700,Analyte
35995,0.544029,0.564594,0.56259,0.5382,0.583195,0.58742,0.593123,0.590762,0.563569,0.507713,...,0.444787,0.359655,0.426219,0.409119,0.448875,0.438485,0.366499,0.455328,0.0,32
35996,0.615789,0.638487,0.626592,0.603206,0.670327,0.67072,0.693729,0.674622,0.661118,0.609632,...,0.41224,0.350868,0.397867,0.386393,0.422903,0.414631,0.337026,0.433557,0.0,11
35997,0.550054,0.56163,0.553628,0.528738,0.583146,0.582664,0.606475,0.58842,0.574775,0.52785,...,0.418386,0.347656,0.400304,0.385908,0.4207,0.410556,0.33776,0.431437,0.0,26
35998,0.639836,0.658228,0.644667,0.610123,0.673862,0.678563,0.704107,0.683188,0.679882,0.623255,...,0.404263,0.339258,0.384795,0.374255,0.41019,0.403803,0.32758,0.417219,0.0,5
35999,0.576727,0.594817,0.58551,0.559755,0.612796,0.616671,0.628374,0.612385,0.596461,0.53799,...,0.473031,0.404669,0.457279,0.445107,0.475039,0.471586,0.389923,0.483286,0.0,4


In [23]:
y=augy.to_numpy()
print(np.unique(y,return_counts=True))
y=np.ravel(y)

(array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
       35, 36, 37, 38, 39, 40]), array([900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900,
       900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900,
       900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900,
       900], dtype=int64))


In [27]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
# We will find by inspection the best k according to the classifier accuracy on the test set
accuracies = []
X=augdata.iloc[:,0:1701]
# We will find by inspection the best k according to the classifier accuracy on the test set
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2,random_state=0)
for k in range(1, 12, 2):
    # Entrenar el clasificador  con el valor actual de  `k`
    neigh = KNeighborsClassifier(n_neighbors=k)
    neigh.fit(X_train, y_train)

    # Evaluar los modelos e imprimiendo sus predicciones
    score = neigh.score(X_test, y_test)
    print("k=%d, accuracy=%.2f%%" % (k, score * 100))
    accuracies.append(score)

k=1, accuracy=91.29%
k=3, accuracy=91.42%
k=5, accuracy=91.69%
k=7, accuracy=91.75%
k=9, accuracy=91.71%
k=11, accuracy=91.47%


### MLP

In [25]:
import tensorflow as tf
from tensorflow import keras
import os
import tempfile
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, roc_auc_score
import keras
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.utils import to_categorical
import time

In [26]:
# Configuration options
feature_vector_length = 1701
num_classes = 55
x=augdata.iloc[:,0:1701].to_numpy()
X_train, X_test, Y_train, Y_test = train_test_split(x,y, test_size=0.2,random_state=0)
# Convert target classes to categorical ones
ytrain=Y_train-1
ytest=Y_test-1
Y_train = to_categorical(ytrain, num_classes)
Y_test = to_categorical(ytest, num_classes)
print('Train dimension:')
print(X_train.shape)
print('Test dimension:')
print(Y_test.shape)

Train dimension:
(28800, 1701)
Test dimension:
(7200, 55)


In [27]:
# Set the input shape
input_shape = (feature_vector_length,)
print(f'Feature shape: {input_shape}')

# Create the model
model = Sequential()
model.add(Dropout(0.3, input_shape=input_shape))
model.add(Dense(300, input_shape=input_shape, activation='relu'))
#model.add(Dropout(0.3))
model.add(Dense(200, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.summary()

Feature shape: (1701,)
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dropout (Dropout)            (None, 1701)              0         
_________________________________________________________________
dense_4 (Dense)              (None, 300)               510600    
_________________________________________________________________
dense_5 (Dense)              (None, 200)               60200     
_________________________________________________________________
dense_6 (Dense)              (None, 55)                11055     
Total params: 581,855
Trainable params: 581,855
Non-trainable params: 0
_________________________________________________________________


In [None]:
# Configure the model and start training
import time
start_time = time.time()
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
baseline_history=model.fit(X_train, Y_train, epochs=100, batch_size=150, verbose=1, validation_split=0.2)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100

In [None]:
print("Training time:---  %s seconds ---" % (time.time() - start_time))
#Test the model after training
start_time=time.time()
test_results = model.evaluate(X_test, Y_test, verbose=1)
print(test_results)
print(f'Test results - Loss: {test_results[0]} - Accuracy: {test_results[1]}%')
print("--- %s seconds ---" % (time.time() - start_time))

### CNN

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Conv1D, Flatten, MaxPooling1D, Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.datasets import load_iris
from numpy import unique
import pandas as pd
import numpy as np
import time
import keras

In [None]:
x=augdata.iloc[:,0:1701].to_numpy()
x = x.reshape(x.shape[0], x.shape[1], 1)
print(x.shape)
y=y-1
#print(unique(y))
#print(unique(y).sum())

xtrain, xtest, ytrain, ytest=train_test_split(x, y, test_size=0.2)


In [None]:
model = Sequential()
model.add(Conv1D(32, 3, activation="relu", input_shape=(1701,1)))
model.add(Dense(64, activation="relu"))
model.add(MaxPooling1D(pool_size=3))
model.add(Dropout(0.5))
model.add(Conv1D(32, 3, activation="relu", input_shape=(1701,1)))
model.add(Dense(64, activation="relu"))
model.add(MaxPooling1D(pool_size=3))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(55, activation = 'softmax'))
start_time = time.time()
model.compile(loss = 'sparse_categorical_crossentropy', optimizer = "adam", metrics = ['accuracy'])
model.summary()
model = Sequential()
baseline_history=model.fit(xtrain, ytrain, epochs=30, batch_size=256, verbose=1, validation_split=0.2)
#model.fit(xtrain, ytrain, batch_size=256,epochs=25,  validation_split=.2, verbose=1)
acc = model.evaluate(xtrain, ytrain)
print("Loss:", acc[0], " Accuracy:", acc[1])
print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
start_time = time.time()
#Test the model after training
start_time=time.time()
test_results = model.evaluate(xtest, ytest, verbose=1)
print(test_results)
print(f'Test results - Loss: {test_results[0]} - Accuracy: {test_results[1]}%')
print("--- %s seconds ---" % (time.time() - start_time))