**Imports**

In [2]:
import tensorflow as tf
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt

import tensorflow.keras
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Conv1D, Flatten, Dense
from tensorflow.keras.models import Sequential

from sklearn.model_selection import train_test_split



**Loading in Data**

In [12]:
from numpy import genfromtxt
data = genfromtxt('spectra_data/pure_spectra_matrix.csv', delimiter=';')

spectra = pd.DataFrame(data=data, columns=np.arange(len(data[0][:])))
metadata = pd.read_csv('spectra_data/pure_spectra_metadata.csv', delimiter=';')


**Processing Data**

In [49]:
#preprocessing
signals = data.reshape(571, 1300, 1)
signals = signals/np.max(signals)

#encoding genera targets
genera = np.unique(np.array([x[:3] for x in metadata.Species.unique()]))

def encode(names, targetset, codelength):
    code = dict(zip(names, np.arange(len(names))))
    print(code)
    encoded_names = [x[:codelength] for x in targetset]
    encoded_names = [code['{}'.format(x)] for x in encoded_names]
    encoded_names = tf.keras.utils.to_categorical(encoded_names)
    return encoded_names

targets_genus = encode(genera, metadata.Species, 3)

#training/validation data
trainG_x, validG_x, trainG_y, validG_y = train_test_split(signals, targets_genus, test_size=0.3, random_state=10)

{'AUG': 0, 'BUT': 1, 'EMD': 2, 'JNH': 3, 'NYV': 4, 'QBG': 5, 'QWP': 6, 'RTO': 7, 'VVJ': 8}


**Conv1D Construction - genus classifier**

In [47]:
layer1_bs = 45
layer2_bs = 36

shape = signals[0].shape

genus_convnet = Sequential()

genus_convnet.add(Conv1D(layer1_bs, kernel_size=3, activation='relu', input_shape=shape))
genus_convnet.add(Conv1D(layer2_bs, kernel_size=3, activation='relu'))
genus_convnet.add(Flatten())
genus_convnet.add(Dense(9, activation='softmax'))
genus_convnet.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


**Train**

In [50]:
genus_convnet.fit(trainG_x, trainG_y, validation_data=(validG_x,validG_y), epochs=20)

Train on 399 samples, validate on 172 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x2296b437dc8>

In [56]:
genus_convnet.save('bacteria_genus_convnet.h5py')

INFO:tensorflow:Assets written to: bacteria_genus_convnet.h5py\assets


**Strain identification**

In [53]:
#labels
strain = np.unique(np.array(metadata.Species.unique()))

#encoding strain targets
targets_strain = encode(strain, metadata.Species, 7)

#training/validation data
trainS_x, validS_x, trainS_y, validS_y = train_test_split(signals, targets_strain, test_size=0.3, random_state=10)

{'AUG.AEX': 0, 'AUG.HSS': 1, 'BUT.BIK': 2, 'BUT.DNW': 3, 'BUT.TRH': 4, 'BUT.YZE': 5, 'EMD.FZO': 6, 'EMD.WXC': 7, 'JNH.FLH': 8, 'JNH.ZIJ': 9, 'NYV.VCE': 10, 'NYV.XSY': 11, 'QBG.CRP': 12, 'QBG.KGI': 13, 'QWP.DRH': 14, 'QWP.LRO': 15, 'RTO.JFR': 16, 'RTO.TQH': 17, 'VVJ.KSF': 18, 'VVJ.KWJ': 19}


**Conv1D Construction - strain classifier**

In [54]:
layer1_bs = 80
layer2_bs = 60

shape = signals[0].shape

strain_convnet = Sequential()

strain_convnet.add(Conv1D(layer1_bs, kernel_size=3, activation='relu', input_shape=shape))
strain_convnet.add(Conv1D(layer2_bs, kernel_size=3, activation='relu'))
strain_convnet.add(Flatten())
strain_convnet.add(Dense(20, activation='softmax'))
strain_convnet.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [55]:
strain_convnet.fit(trainS_x, trainS_y, validation_data=(validS_x,validS_y), epochs=20)

Train on 399 samples, validate on 172 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x22970b54588>

In [57]:
strain_convnet.save('bacteria_strain_convnet.h5py')

INFO:tensorflow:Assets written to: bacteria_strain_convnet.h5py\assets
