# Importing the required Libraries 

In [94]:
from scipy.io import loadmat
import pandas as pd

In [99]:
# Load data from .mat file using scipy.io module
data_dict = scipy.io.loadmat('WLDataCW.mat')

# Extract data and label from the dictionary loaded above
data = data_dict['data']
label = data_dict['label']

# Reshape data into a 2D array (360 rows x 31,744 columns) and convert it to a pandas DataFrame with column names
data_df = pd.DataFrame(data.reshape(62*512, 360).T, columns=[f'feature_{i}' for i in range(62*512)])

# Reshape label into a 1D array (360 elements) and convert it to a pandas DataFrame with a single column named 'label'
label_df = pd.DataFrame(label.reshape(360,), columns=['label'])

# Concatenate data and label DataFrames along columns axis (axis=1) to create the final DataFrame
df = pd.concat([data_df, label_df], axis=1)

In [96]:
df

Unnamed: 0,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,...,feature_31735,feature_31736,feature_31737,feature_31738,feature_31739,feature_31740,feature_31741,feature_31742,feature_31743,label
0,3.645161,1.577007,-2.884674,-7.294243,-8.769628,-6.107172,-0.875113,3.444995,3.762372,-0.507442,...,14.552163,21.187590,25.256897,22.113747,13.521024,6.434617,6.510839,12.609276,17.854654,0
1,-1.225078,2.687866,3.478920,-0.998662,-7.863265,-11.353711,-7.627178,2.002784,12.165368,17.727137,...,6.335400,12.920719,12.482493,4.597783,-7.552145,-19.150581,-26.119247,-26.754780,-22.282345,0
2,8.414157,2.379914,-3.338521,-7.477322,-9.546935,-9.531569,-7.968606,-6.104305,-5.469647,-6.837555,...,-13.434300,-7.721217,2.970876,9.848282,6.062175,-6.689582,-18.814631,-20.730404,-11.228072,0
3,3.311837,2.850084,1.313545,0.674265,1.052333,0.343137,-3.497101,-9.780030,-14.930172,-15.362961,...,-7.649138,-3.250118,4.343658,10.612925,10.251337,2.091847,-8.952351,-15.475546,-13.764502,0
4,-9.322539,-2.384815,3.101957,4.720293,3.202580,2.139584,4.917171,11.307302,16.890072,16.279924,...,-16.844177,-21.338095,-21.321344,-19.264174,-17.024244,-14.048934,-8.670050,-1.055577,5.491176,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
355,-10.650115,-9.326292,-9.187932,-9.651434,-9.201318,-6.872770,-3.324494,-0.485164,-0.027376,-1.959681,...,-8.496918,-7.160340,-5.629473,-3.690045,-1.307681,1.285857,3.856766,6.362743,8.822712,1
356,10.773194,12.778286,11.652669,8.970365,7.637110,8.529339,9.320190,6.831780,0.592656,-6.088131,...,-0.446031,0.694977,1.714937,2.762491,3.859814,4.930628,5.979158,7.117027,8.307706,1
357,-11.673164,-9.803854,-7.338249,-5.520587,-3.916504,-1.764414,0.401452,0.814396,-1.765881,-6.511787,...,15.167360,13.969464,12.170960,10.332232,8.886376,8.105309,7.956192,7.909022,7.063335,1
358,26.508423,29.570061,30.927824,29.329739,25.063768,19.500076,14.430696,11.417859,11.171830,13.123591,...,3.124659,2.387371,1.217971,0.343328,-0.034028,-0.270347,-0.687760,-1.110718,-1.096807,1


# Logistic Regression Functions

In [100]:
# Import necessary modules
import numpy as np
from sklearn.model_selection import KFold

# Define sigmoid activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Define forward propagation function
def forward_propagation(X, w):
    z = np.dot(X, w)
    y_pred = sigmoid(z)
    return y_pred

# Define gradient calculation function
def calculate_gradient(X, y, y_pred):
    gradient = np.dot(X.T, (y_pred - y)) / len(y)
    return gradient

# Define logistic regression function
def logistic_regression(X, y, lr=0.01, n_iterations=1000):
    w = np.zeros(X.shape[1])
    for i in range(n_iterations):
        y_pred = forward_propagation(X, w)
        gradient = calculate_gradient(X, y, y_pred)
        w -= lr * gradient
    return w

# Define function to evaluate model accuracy
def evaluate_model(X, y, w):
    y_pred = forward_propagation(X, w)
    y_pred_binary = np.round(y_pred)
    accuracy = np.mean(y_pred_binary == y)
    return accuracy

# Define function to perform cross-validation
def cross_validation(X, y, n_folds=5, lr=0.01, n_iterations=1000):
    kf = KFold(n_splits=n_folds)
    accuracies = []
    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        w = logistic_regression(X_train, y_train, lr=lr, n_iterations=n_iterations)
        accuracy = evaluate_model(X_test, y_test, w)
        accuracies.append(accuracy)
    mean_accuracy = np.mean(accuracies)
    return mean_accuracy


In [101]:
# perform cross-validation
mean_accuracy = cross_validation(X, y)

print(f'Mean accuracy: {mean_accuracy:.4f}')

  return 1 / (1 + np.exp(-x))


Mean accuracy: 0.4694


# CNN model 

In [103]:
# Import necessary modules
import numpy as np
from sklearn.model_selection import KFold

# Initialize KFold cross-validation with 5 splits, shuffling, and random state 42
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Split data into 5 folds using KFold cross-validation
folds = list(kf.split(X, y))


In [104]:
# Import necessary modules from Keras library
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv1D, MaxPooling1D

# Define function to create a Convolutional Neural Network (CNN) model
def create_model():
    # Initialize the CNN model
    model = Sequential()
    
    # Add 1D convolution layer with 32 filters, kernel size of 3, ReLU activation, and input shape of (31744, 1)
    model.add(Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(31744, 1)))
    
    # Add max pooling layer with pool size of 2
    model.add(MaxPooling1D(pool_size=2))
    
    # Add 1D convolution layer with 64 filters, kernel size of 3, ReLU activation
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    
    # Add max pooling layer with pool size of 2
    model.add(MaxPooling1D(pool_size=2))
    
    # Add 1D convolution layer with 128 filters, kernel size of 3, ReLU activation
    model.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
    
    # Add max pooling layer with pool size of 2
    model.add(MaxPooling1D(pool_size=2))
    
    # Flatten the output of the previous layer to a 1D vector
    model.add(Flatten())
    
    # Add dense layer with 64 units and ReLU activation
    model.add(Dense(units=64, activation='relu'))
    
    # Add dropout layer with a rate of 0.5
    model.add(Dropout(0.5))
    
    # Add dense layer with 1 unit and sigmoid activation for binary classification
    model.add(Dense(units=1, activation='sigmoid'))
    
    # Compile the model with Adam optimizer, binary cross-entropy loss, and accuracy metric
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    
    # Return the compiled model
    return model

In [106]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.metrics import accuracy_score

results = []

# loop through each fold
for train_idx, val_idx in folds:
    # split the data into training and validation sets
    X_train, y_train = X[train_idx], y[train_idx]
    X_val, y_val = X[val_idx], y[val_idx]
    
    # add a dimension to the data for the Conv1D layer
    X_train = np.expand_dims(X_train, axis=2)
    X_val = np.expand_dims(X_val, axis=2)
    
    # create the Keras model with given parameters
    model = KerasClassifier(build_fn=create_model, epochs=10, batch_size=32, verbose=0)
    
    # train the model on the training data
    model.fit(X_train, y_train)
    
    # make predictions on the validation data
    y_pred = model.predict(X_val)
    
    # compute accuracy score for the validation data
    acc = accuracy_score(y_val, y_pred)
    
    # append the accuracy score to results list
    results.append(acc)

# compute the mean of all the accuracy scores
print("The mean accuracy is ", np.mean(results))

  model = KerasClassifier(build_fn=create_model, epochs=10, batch_size=32, verbose=0)




  model = KerasClassifier(build_fn=create_model, epochs=10, batch_size=32, verbose=0)




  model = KerasClassifier(build_fn=create_model, epochs=10, batch_size=32, verbose=0)




  model = KerasClassifier(build_fn=create_model, epochs=10, batch_size=32, verbose=0)




  model = KerasClassifier(build_fn=create_model, epochs=10, batch_size=32, verbose=0)


The mean accuracy is  0.85


# Parameter Tuning Of CNN

In [108]:
# Import necessary modules
import numpy as np
from sklearn.model_selection import KFold, GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.metrics import accuracy_score

# Initialize KFold cross-validation with 5 splits, shuffling, and random state 42
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Split data into 5 folds using KFold cross-validation
folds = list(kf.split(X, y))

# Define function to create a Convolutional Neural Network (CNN) model
def create_model(dropout_rate=0.5, optimizer='adam'):
    # Initialize the CNN model
    model = Sequential()

    # Add 1D convolution layer with 32 filters, kernel size of 3, ReLU activation, and input shape of (31744, 1)
    model.add(Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(31744, 1)))

    # Add max pooling layer with pool size of 2
    model.add(MaxPooling1D(pool_size=2))

    # Add 1D convolution layer with 64 filters, kernel size of 3, ReLU activation
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))

    # Add max pooling layer with pool size of 2
    model.add(MaxPooling1D(pool_size=2))

    # Add 1D convolution layer with 128 filters, kernel size of 3, ReLU activation
    model.add(Conv1D(filters=128, kernel_size=3, activation='relu'))

    # Add max pooling layer with pool size of 2
    model.add(MaxPooling1D(pool_size=2))

    # Flatten the output of the previous layer to a 1D vector
    model.add(Flatten())

    # Add dense layer with 64 units and ReLU activation
    model.add(Dense(units=64, activation='relu'))

    # Add dropout layer with a rate of dropout_rate
    model.add(Dropout(dropout_rate))

    # Add dense layer with 1 unit and sigmoid activation for binary classification
    model.add(Dense(units=1, activation='sigmoid'))

    # Compile the model with given optimizer, binary cross-entropy loss, and accuracy metric
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

    # Return the compiled model
    return model

# Set the hyperparameters to tune using grid search
param_grid = {
    'dropout_rate': [0.25, 0.5, 0.75],
    'optimizer': ['adam', 'sgd']
}

# Create the Keras model
model = KerasClassifier(build_fn=create_model, epochs=10, batch_size=32, verbose=0)

# Create the GridSearchCV object
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=folds, verbose=1)

# Fit the grid search object to the data
grid_result = grid.fit(X, y)

# Print the best hyperparameters and mean accuracy score
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))


  model = KerasClassifier(build_fn=create_model, epochs=10, batch_size=32, verbose=0)


Fitting 5 folds for each of 6 candidates, totalling 30 fits
Best: 0.936111 using {'dropout_rate': 0.5, 'optimizer': 'adam'}


# NOTES 

In [109]:
#The initial accuracy of the logistic regression model was 47%, 
#while the CNN model yielded a much higher accuracy 
#of 85%. After tuning the hyperparameters of the CNN model using grid search, 
#the accuracy of the CNN model increased even further to 93%.
#This significant improvement in accuracy demonstrates the power of CNNs in modeling complex
#relationships in data with multiple features. By tuning the hyperparameters of the CNN model using grid search, I was
#able to fine-tune the model and achieve better performance.