# 1. Fully connected neural networks

Import libraries

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.callbacks import Callback
from IPython.display import display
import tkinter as tk 
from tkinter import filedialog

File selection

In [4]:
file_path = "./Health_Sleep_Statistics.csv"

Load data, print column names and dataset size

In [None]:
data = pd.read_csv(file_path)

Callback to print training progress

In [2]:
class TrainingProgress(Callback):
    def on_epoch_end(self, epoch, logs=None):
        print(f'Epoch {epoch+1}: loss = {logs["loss"]:.4f}, accuracy = {logs["accuracy"]:.4f}')

Displaying the table

In [None]:
print("First 5 rows of the dataset:")
display(data.head())

Encode categorical data

In [6]:
label_encoders = {}
for column in data.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

Splitting data into training and test sets

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Standardizing data

In [8]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

Building the model

In [9]:
model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(10, activation='softmax')
])

Compiling the model

In [10]:
model.compile(optimizer=Adam(learning_rate=0.001),
            loss=SparseCategoricalCrossentropy(),
            metrics=['accuracy'])

Training the model with a callback

In [None]:
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), callbacks=[TrainingProgress()])

Evaluating the model

In [None]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f'\nModel accuracy: {accuracy:.4f}')

Predictions

In [13]:
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

Print accuracy

In [None]:
print(f'MLP Accuracy: {accuracy:.4f}')

Classification report and confusion matrix

In [None]:
print("\nClassification Report:")
print(classification_report(y_test, y_pred_classes, digits=4))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_classes))

Creating heatmap with readable labels

In [None]:
plt.figure(figsize=(14, 10))
heatmap = sns.heatmap(data.corr(), annot=True, cmap='coolwarm', fmt='.2f')
heatmap.set_xticklabels(heatmap.get_xticklabels(), rotation=45, horizontalalignment='right')
heatmap.set_yticklabels(heatmap.get_yticklabels(), rotation=0, horizontalalignment='right')
plt.title('Correlation Heatmap')
plt.tight_layout()  # Adjusts the plot to ensure everything fits without overlapping
plt.show()

# 2. Convolutional neural networks

In [17]:
%matplotlib inline

Disable warnings

In [18]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="keras.src.trainers.data_adapters.py_dataset_adapter")

Import libraries

In [19]:
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Input
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
from IPython.display import display

Execution block

In [20]:
train_dir = './dataset/train'
test_dir = './dataset/test'

Function to prepare data

In [21]:
def prepare_data(train_dir, test_dir):
    # Check directory existence
    if not os.path.isdir(train_dir):
        raise FileNotFoundError(f"Training directory not found: {train_dir}")
    if not os.path.isdir(test_dir):
        raise FileNotFoundError(f"Test directory not found: {test_dir}")

Data generators for training, validation, and testing

In [None]:
train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
train_generator = train_datagen.flow_from_directory(
    train_dir, 
    target_size=(150, 150),
    batch_size=32,
    class_mode='categorical',
    subset='training'
)

validation_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='categorical',
    subset='validation'
)

test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
    test_dir, 
    target_size=(150, 150),
    batch_size=32,
    class_mode='categorical'
)


Function to train a model from scratch

In [23]:
def build_and_train_model_from_scratch(train_generator, validation_generator):
    model = Sequential([
        Input(shape=(150, 150, 3)),
        Conv2D(32, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Conv2D(128, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(512, activation='relu'),
        Dense(train_generator.num_classes, activation='softmax')
    ])

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    
    history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    epochs=20,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // validation_generator.batch_size
)

Function to train a model with transfer learning

In [24]:
def build_and_train_model_with_transfer_learning(train_generator, validation_generator):
    base_model = EfficientNetB0(include_top=False, input_shape=(150, 150, 3))
    base_model.trainable = False

    model = Sequential([
        base_model,
        Flatten(),
        Dense(512, activation='relu'),
        Dense(train_generator.num_classes, activation='softmax')
    ])

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    history = model.fit(
        train_generator,
        steps_per_epoch=train_generator.samples // train_generator.batch_size,
        epochs=10,
        validation_data=validation_generator,
        validation_steps=validation_generator.samples // validation_generator.batch_size
    )

    base_model.trainable = True
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    history_finetune = model.fit(
        train_generator,
        steps_per_epoch=train_generator.samples // train_generator.batch_size,
        epochs=10,
        validation_data=validation_generator,
        validation_steps=validation_generator.samples // validation_generator.batch_size
    )

    return history, history_finetune

Image generator settings

In [None]:
datagen = ImageDataGenerator(rescale=1./255)
data_generator = datagen.flow_from_directory(
    test_dir,
    target_size=(150, 150),
    batch_size=1,
    class_mode=None,
    shuffle=True
)

Image visualisation

In [None]:
plt.figure(figsize=(10, 10)) 
for i in range(9): 
    image = next(data_generator)[0] 
    ax = plt.subplot(3, 3, i + 1) 
    plt.imshow(image) 
    plt.axis("off") 

plt.show()

# 3. Recurrent neural networks

Import libraries

In [27]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, GRU
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.utils import resample
from sklearn.model_selection import train_test_split

Load training and testing data

In [28]:
train_data = pd.read_csv('train.csv', encoding='ISO-8859-1') 
test_data = pd.read_csv('test.csv', encoding='ISO-8859-1') 

Print the number of rows in each dataset

In [None]:
print(f"Number of rows in training data: {len(train_data)}")
print(f"Number of rows in testing data: {len(test_data)}")

Columns

In [30]:
text_column = 'User Review Text'
label_column = 'User Rating'

if text_column not in train_data.columns or label_column not in train_data.columns:
    print(f"Error: Columns '{text_column}' or '{label_column}' not found in the dataset.")
else:
    texts = train_data[text_column].tolist()
    labels = (train_data[label_column] >= 4).astype(int).tolist()  # Example binarization

Data balancing

In [31]:
def balance_data(texts, labels):
    texts_balanced, labels_balanced = [], []
    classes = np.unique(labels)
    max_count = max([sum(np.array(labels) == cls) for cls in classes])
    for cls in classes:
        cls_texts = [texts[i] for i in range(len(labels)) if labels[i] == cls]
        cls_texts_upsampled = resample(cls_texts, replace=True, n_samples=max_count, random_state=42)
        texts_balanced.extend(cls_texts_upsampled)
        labels_balanced.extend([cls] * max_count)
    return texts_balanced, labels_balanced

texts_balanced, labels_balanced = balance_data(texts, labels)

Tokenization and padding

In [32]:
max_vocab_size = 10000
max_sequence_length = 100
tokenizer = Tokenizer(num_words=max_vocab_size, oov_token='<UNK>')
tokenizer.fit_on_texts(texts_balanced)
sequences = tokenizer.texts_to_sequences(texts_balanced)
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length, padding='post')

Split data

In [33]:
X_train, X_test, y_train, y_test = train_test_split(
    padded_sequences, labels_balanced, test_size=0.2, random_state=42
)

Load pre-trained embeddings

In [34]:
def load_glove_embeddings(glove_file, tokenizer, embedding_dim):
    embeddings_index = {}
    with open(glove_file, 'r', encoding='utf-8') as f:
        for line in f:
            values = line.split()
            word = values[0]
            coefs = np.asarray(values[1:], dtype='float32')
            embeddings_index[word] = coefs

    print(f"Loaded {len(embeddings_index)} word vectors from GloVe.")

    # Prepare embedding matrix
    word_index = tokenizer.word_index
    embedding_matrix = np.zeros((max_vocab_size, embedding_dim))
    for word, i in word_index.items():
        if i >= max_vocab_size:
            continue
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector

    return embedding_matrix

Path to GloVe embeddings (update with your file path)

In [None]:
glove_file = "glove.6B.100d.txt"  # Download GloVe from https://nlp.stanford.edu/projects/glove/
embedding_dim = 100
embedding_matrix = load_glove_embeddings(glove_file, tokenizer, embedding_dim)

Model creation

In [36]:
def build_model(use_pretrained_embeddings=False, embedding_matrix=None):
    model = Sequential()
    if use_pretrained_embeddings:
        model.add(Embedding(input_dim=max_vocab_size, 
                            output_dim=embedding_dim, 
                            weights=[embedding_matrix], 
                            input_length=max_sequence_length, 
                            trainable=False))
    else:
        model.add(Embedding(input_dim=max_vocab_size, 
                            output_dim=embedding_dim, 
                            input_length=max_sequence_length))

    model.add(LSTM(128, return_sequences=True))
    model.add(Dropout(0.5))
    model.add(GRU(64))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

Build model with pre-trained embeddings

In [37]:
use_pretrained = True
model = build_model(use_pretrained_embeddings=use_pretrained, embedding_matrix=embedding_matrix)

Training

In [None]:
history = model.fit(
    X_train, np.array(y_train), 
    validation_split=0.2, 
    epochs=10, 
    batch_size=32
)

Evaluation

In [None]:
results = model.evaluate(X_test, np.array(y_test))
print(f"Test Loss: {results[0]}, Test Accuracy: {results[1]}")