In [7]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, GaussianNoise
from sklearn.metrics import confusion_matrix
import sys
%matplotlib inline
import matplotlib.pyplot as plt
import plotly.express as px

In [22]:
def genesis_train(file):
    data = pd.read_csv(file)
    if 'Unnamed: 32' in data.columns:
        del data['Unnamed: 32']
    print('Number of datapoints in Training dataset: ',len(data))
    X_train = data.iloc[:, 2:].values
    y_train = data.iloc[:, 1].values
    
    test = pd.read_csv('./data/test.csv')
    del test['Unnamed: 32']
    print('Number of datapoints in Testing dataset: ',len(test))
    X_test = test.iloc[:, 2:].values
    y_test = test.iloc[:, 1].values

    labelencoder = LabelEncoder()
    y_train = labelencoder.fit_transform(y_train)
    y_test = labelencoder.fit_transform(y_test)

    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)

    model = Sequential()

    model.add(Dense(16, activation='relu', input_dim=30))
    model.add(Dropout(0.1))
    model.add(Dense(16, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    model.fit(X_train, y_train, batch_size=100, epochs=10)

    scores = model.evaluate(X_test, y_test)
    print("Loss: ", scores[0])        #Loss
    print("Accuracy: ", scores[1])    #Accuracy

    #Saving Model
    model.save("./output.h5")
    return scores[1]

def genesis_train_gaussian_noise(file):
    data = pd.read_csv(file)
    if 'Unnamed: 32' in data.columns:
        del data['Unnamed: 32']
    print('Number of datapoints in Training dataset: ',len(data))
    X_train = data.iloc[:, 2:].values
    y_train = data.iloc[:, 1].values
    
    data = GaussianNoise(0.2)(data)
    
    test = pd.read_csv('./data/test.csv')
    del test['Unnamed: 32']
    print('Number of datapoints in Testing dataset: ',len(test))
    X_test = test.iloc[:, 2:].values
    y_test = test.iloc[:, 1].values

    labelencoder = LabelEncoder()
    y_train = labelencoder.fit_transform(y_train)
    y_test = labelencoder.fit_transform(y_test)

    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)

    model = Sequential()

    model.add(Dense(16, activation='relu', input_dim=30))
    model.add(Dropout(0.1))
    model.add(Dense(16, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    model.fit(X_train, y_train, batch_size=100, epochs=10)

    scores = model.evaluate(X_test, y_test)
    print("Loss: ", scores[0])        #Loss
    print("Accuracy: ", scores[1])    #Accuracy

    #Saving Model
    model.save("./output.h5")
    return scores[1]

In [27]:
genesis_train('./data/data.csv')

Number of datapoints in Training dataset:  494
Number of datapoints in Testing dataset:  75
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss:  0.3359147608280182
Accuracy:  0.9200000166893005


0.9200000166893005

In [25]:
genesis_train_gaussian_noise('./data/data.csv')

Number of datapoints in Training dataset:  494
Number of datapoints in Testing dataset:  75
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss:  0.22832387685775757
Accuracy:  0.9466666579246521


0.9466666579246521