In [3]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/undersampleddatausingrepeatedenn/undersampled_using_repeated_enn.csv


In [5]:
import pandas as pd
import numpy as np

df = pd.read_csv('/kaggle/input/undersampleddatausingrepeatedenn/undersampled_using_repeated_enn.csv')

In [9]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten
from keras.utils import np_utils

# Extract the features and label
features = df.drop("Label", axis=1).values
labels = df["Label"].values

# Encode the labels to numeric values
encoder = LabelEncoder()
labels = encoder.fit_transform(labels)
labels = np_utils.to_categorical(labels)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Reshape the data for CNN
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

# Define the CNN model
model = Sequential()
model.add(Conv1D(64, 3, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Conv1D(64, 3, activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(labels.shape[1], activation='softmax'))

# Compile the model
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
model.fit(X_train, y_train, epochs=5, batch_size=64, validation_data=(X_test, y_test))

# Evaluate the model
score = model.evaluate(X_test, y_test, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test loss: 0.46912312507629395
Test accuracy: 0.8924428224563599


# **Use Optuna to Optimize Hyper Parameter CNN**

In [11]:
!pip install optuna

/bin/bash: /opt/conda/lib/libtinfo.so.6: no version information available (required by /bin/bash)
[0m

In [None]:
import optuna
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten
from keras.utils import np_utils

# Define the objective function for Optuna to optimize
def objective(trial):
    # Define the hyperparameters to optimize
    filters1 = trial.suggest_int("filters1", 32, 128)
    filters2 = trial.suggest_int("filters2", 32, 128)
    window1 = trial.suggest_int("window1", 2, 5)
    window2 = trial.suggest_int("window2", 2, 5)
    dense_units = trial.suggest_int("dense_units", 64, 256)
    
    # Extract the features and label
    features = df.drop("Label", axis=1).values
    labels = df["Label"].values

    # Encode the labels to numeric values
    encoder = LabelEncoder()
    labels = encoder.fit_transform(labels)
    labels = np_utils.to_categorical(labels)

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

    # Reshape the data for CNN
    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

    # Define the CNN model
    model = Sequential()
    model.add(Conv1D(filters=filters1, kernel_size=window1, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Conv1D(filters=filters2, kernel_size=window2, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(dense_units, activation='relu'))
    model.add(Dense(labels.shape[1], activation='softmax'))

    # Compile the model
    model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

    # Train the model
    model.fit(X_train, y_train, epochs=5, batch_size=64, validation_data=(X_test, y_test), verbose=0)

    # Evaluate the model
    score = model.evaluate(X_test, y_test, verbose=0)
    return score[0]

# Define the study and optimize the objective function
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)

# Print the best hyperparameters and the best loss
print("Best hyperparameters: ", study.best_params)
print("Best loss: ", study.best_value)


[32m[I 2023-02-19 20:38:51,944][0m A new study created in memory with name: no-name-882de31c-4efc-443f-a3fe-2b1182f15061[0m
[32m[I 2023-02-19 20:43:13,665][0m Trial 0 finished with value: 0.2756122350692749 and parameters: {'filters1': 117, 'filters2': 46, 'window1': 2, 'window2': 5, 'dense_units': 71}. Best is trial 0 with value: 0.2756122350692749.[0m
[32m[I 2023-02-19 20:46:37,434][0m Trial 1 finished with value: 0.6039955019950867 and parameters: {'filters1': 36, 'filters2': 91, 'window1': 4, 'window2': 5, 'dense_units': 202}. Best is trial 0 with value: 0.2756122350692749.[0m
[32m[I 2023-02-19 20:50:04,507][0m Trial 2 finished with value: 0.26765820384025574 and parameters: {'filters1': 61, 'filters2': 62, 'window1': 4, 'window2': 5, 'dense_units': 169}. Best is trial 2 with value: 0.26765820384025574.[0m
[32m[I 2023-02-19 20:53:36,175][0m Trial 3 finished with value: 0.2662080228328705 and parameters: {'filters1': 92, 'filters2': 62, 'window1': 2, 'window2': 3, 'den