<a href="https://colab.research.google.com/github/jazbengu/COS711-ASSIGNMENT-TW0/blob/main/Joy_Bengu_25000307_COS711_Assignment_Two_Code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install scikeras
!pip install --force-reinstall tensorflow



Collecting scikeras
  Downloading scikeras-0.13.0-py3-none-any.whl.metadata (3.1 kB)
Downloading scikeras-0.13.0-py3-none-any.whl (26 kB)
Installing collected packages: scikeras
Successfully installed scikeras-0.13.0
Collecting tensorflow
  Using cached tensorflow-2.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.2 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Downloading absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Using cached astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Using cached flatbuffers-24.3.25-py2.py3-none-any.whl.metadata (850 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)
  Using cached gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow)
  Using cached google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting h5py>=3.10.0 (from tensorflow)
  Download

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns

from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.optimizers import Optimizer
from tensorflow.keras.optimizers import RMSprop
from scikeras.wrappers import KerasClassifier
import matplotlib.pyplot as plt




ModuleNotFoundError: No module named 'scikeras'

In [26]:

def load_data(file_path):
    data = pd.read_csv(file_path)
    imputer = SimpleImputer(strategy='mean')
    data.iloc[:, :-1] = imputer.fit_transform(data.iloc[:, :-1])  # Assuming last column is labels

    X = data.iloc[:, :-1]
    y = data.iloc[:, -1]

    y = pd.get_dummies(y).values

    return X, y

def preprocess_data(X_train, X_test):
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    return X_train, X_test


In [27]:
def create_model(optimizer='adam', learning_rate=0.001, activation='relu'):
    model = Sequential()
    model.add(Dense(128, input_dim=13, activation=activation))  # 12 input features
    model.add(Dense(64, activation=activation))
    model.add(Dense(3, activation='softmax'))  # 3 classes

    if optimizer == 'adam':
        opt = Adam(learning_rate=learning_rate)
    else:
        opt = SGD(learning_rate=learning_rate)

    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
    return model


In [28]:
from scikeras.wrappers import KerasClassifier
def perform_grid_search(X_train, y_train):
    model = KerasClassifier(build_fn=create_model, verbose=0)
    param_grid = {
        'batch_size': [10, 20],
        'epochs': [10, 50],
        'optimizer': ['adam', 'sgd'],
        'model__learning_rate': [0.001, 0.01],
        'model__activation': ['relu']
    }

    grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
    grid_result = grid.fit(X_train, y_train)

    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))



    results_df = pd.DataFrame(grid_result.cv_results_)


    results_pivot = results_df.pivot_table(index='param_batch_size',
                                           columns='param_epochs',
                                           values='mean_test_score')

    # Plot heatmap using seaborn
    plt.figure(figsize=(8, 6))
    sns.heatmap(results_pivot, annot=True, fmt=".3f", cmap="YlGnBu")
    plt.title("Hyperparameter Tuning: Batch Size vs Epochs")
    plt.xlabel("Epochs")
    plt.ylabel("Batch Size")
    plt.show()
    return grid_result.best_estimator_

In [29]:
def train_and_compare_algorithms(X_train, y_train, X_test, y_test):
    adam_model = create_model(optimizer='adam')
    history_adam = adam_model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=20)

    sgd_model = create_model(optimizer='sgd')
    history_sgd = sgd_model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=20)

    plt.plot(history_adam.history['val_loss'], label='Adam')
    plt.plot(history_sgd.history['val_loss'], label='SGD')
    plt.legend()
    plt.show()



In [30]:
def hybrid_learning(X_train, y_train, X_test, y_test):
    adam_model = create_model(optimizer='adam')
    sgd_model = create_model(optimizer='sgd')

    adam_model.fit(X_train, y_train, epochs=50, batch_size=20, verbose=0)
    sgd_model.fit(X_train, y_train, epochs=50, batch_size=20, verbose=0)


    adam_weights = np.array(adam_model.get_weights())
    sgd_weights = np.array(sgd_model.get_weights())

    hybrid_weights = (adam_weights + sgd_weights) / 2
    adam_model.set_weights(hybrid_weights)

    loss, accuracy = adam_model.evaluate(X_test, y_test)
    print(f"Hybrid model accuracy: {accuracy}")


In [31]:
def main():
    X, y = load_data('Almond.csv')

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    X_train, X_test = preprocess_data(X_train, X_test)

    best_model = perform_grid_search(X_train, y_train)

    train_and_compare_algorithms(X_train, y_train, X_test, y_test)

    hybrid_learning(X_train, y_train, X_test, y_test)

if __name__ == "__main__":
    main()


KeyboardInterrupt: 