In [None]:
from csv_processor import CSVProcessor
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import csv

In [None]:
processor = CSVProcessor("../Training Data")
X_train, y_train = processor.split()
#processor = CSVProcessor("../Testing Data")
#X_test, y_test = processor.split()
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [None]:
def apply_data_augmentation(X_train, y_train, num_augmented_samples=1000):
    augmented_data = []
    augmented_labels = []

    for i in range(num_augmented_samples):
        # Randomly select an index from the original data
        index = np.random.randint(len(X_train))
        original_sample = X_train.iloc[index]
        label = y_train[index]

        # Apply data augmentation (you can customize these operations)
        augmented_sample = original_sample + np.random.normal(0, 0.1, original_sample.shape)  # Adding random noise
        augmented_sample *= np.random.uniform(0.9, 1.1)  # Scaling by a random factor
        if np.random.rand() < 0.5:
            augmented_sample = np.flip(augmented_sample)  # Randomly flipping the sample

        augmented_data.append(augmented_sample)
        augmented_labels.append(label)

    # Convert the augmented data and labels to numpy arrays
    augmented_data = np.array(augmented_data)
    augmented_labels = np.array(augmented_labels)

    # Concatenate the original data with the augmented data
    X_train_augmented = np.concatenate((X_train.values, augmented_data), axis=0)
    y_train_augmented = np.concatenate((y_train, augmented_labels), axis=0)

    return X_train_augmented, y_train_augmented

# Assuming X_train and y_train are your original data in pandas DataFrames
X_train_augmented, y_train_augmented = apply_data_augmentation(X_train.reset_index(drop=True), y_train, num_augmented_samples=1000)

In [None]:
X_train, y_train = X_train_augmented, y_train_augmented

In [None]:
scaler = StandardScaler()

# Fit the scaler on the training features and transform them
X_train_scaled = scaler.fit_transform(X_train)

# Transform the test features using the fitted scaler
X_test_scaled = scaler.transform(X_test)

In [None]:
# 4. Split the Data into Training and Test Sets
X_train, X_test = X_train_scaled, X_test_scaled

In [None]:
# Test different hyperparameter combinations for the Keras model
"""
# List of hyperparameter combinations
input_shape = X_train.shape[1]
num_classes = len(np.unique(y_train))
param_combinations = [
    {'hidden_layers': 1, 'hidden_units': 64, 'dropout': 0.5, 'batch_size': 4, 'epochs': 100},
    {'hidden_layers': 2, 'hidden_units': 128, 'dropout': 0.5, 'batch_size': 8, 'epochs': 150},
    {'hidden_layers': 3, 'hidden_units': 256, 'dropout': 0.5, 'batch_size': 16, 'epochs': 200},
]

# Create an empty list to store the results
results = []

# Iterate over each parameter combination
for params in param_combinations:
    # Create the model with the given parameters
    model = Sequential()
    for _ in range(params['hidden_layers']):
        model.add(Dense(params['hidden_units'], activation='relu'))
        model.add(BatchNormalization())
        model.add(Dropout(params['dropout']))
    model.add(Dense(num_classes, activation='softmax'))
    
    # Compile and fit the model
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.fit(X_train, y_train, batch_size=params['batch_size'], epochs=params['epochs'], verbose=1)
    
    # Evaluate the model on the testing data
    loss, accuracy = model.evaluate(X_test, y_test)
    
    # Append the parameters and accuracy to the results list
    result = {
        'hidden_layers': params['hidden_layers'],
        'hidden_units': params['hidden_units'],
        'dropout': params['dropout'],
        'batch_size': params['batch_size'],
        'epochs': params['epochs'],
        'accuracy': accuracy
    }
    results.append(result)

# Save the results to a CSV file
filename = 'model_results.csv'
keys = results[0].keys()
with open(filename, 'w', newline='') as file:
    writer = csv.DictWriter(file, keys)
    writer.writeheader()
    writer.writerows(results)
"""

In [None]:
input_shape = X_train.shape[1]
num_classes = len(np.unique(y_train))

model = Sequential([
    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(32, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Fit the model to the training data
model.fit(X_train, y_train, batch_size=4, epochs=200, verbose=1)

In [None]:
# Evaluate the model on the testing data
loss, accuracy = model.evaluate(X_test, y_test)

print("Testing Accuracy:", accuracy)