# Modeling

## [1. Importing Packages and Methods](#1_importing_packages_and_methods)
## [2. Importing Data and Labeling](#2_importing_data_and_labeling)
## [3. Modeling](#3_modeling)
- ### [3.1 Resnet-50](#3_1_resnet_50)
- ### [3.2 Alexnet](#3_2_alexnet)
- ### [3.3 GoogLeNet](#3_3_googlenet)


## 1. Importing Packages and Methods<a id='1_importing_packages_and_methods'></a>

In [1]:
import os
import numpy as np
from PIL import Image

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense


## Import desired packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import timedelta
import statsmodels.api as sm
import gc

from sklearn import __version__ as sklearn_version
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale
from sklearn.model_selection import train_test_split, cross_validate, GridSearchCV, learning_curve, TimeSeriesSplit
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.dummy import DummyRegressor
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.pipeline import make_pipeline
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import SelectKBest, f_regression
import datetime

from tqdm import tqdm
import itertools
import gc


## 2. Importing Data and Labeling<a id='2_importing_data_and_labeling'></a>

In [3]:
# Load images and labels
# We make sure it loads RGB things
def load_images(folder_path, label):
    images = []
    labels = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".png"):
            image = Image.open(os.path.join(folder_path, filename)).convert('RGB').resize((224, 224))
            images.append(np.array(image))
            labels.append(label)
    return images, labels

folder_0 = "/content/drive/MyDrive/data/non_hs"
folder_1 = "/content/drive/MyDrive/data/hs"

# Mark non_head_and_shoulder as 0 and head_and_shoulder as 1
images_0, labels_0 = load_images(folder_0, 0)
images_1, labels_1 = load_images(folder_1, 1)

images = np.array(images_0 + images_1)
labels = np.array(labels_0 + labels_1)

# Split data into train and test sets, we also stratify them 
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42, stratify=labels)

# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    rescale=1./255
)

datagen.fit(X_train)

# Define model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Compile model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train model
model.fit(datagen.flow(X_train, y_train, batch_size=32), validation_data=(X_test/255, y_test), epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f05b3d599c0>

## 3. Modeling <a id='3_modeling'></a>

### 3.1 Resnet 50

In [4]:
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.layers import GlobalAveragePooling2D

# ... (Load images and labels, split data, and data augmentation code remains the same)

# Load the pretrained ResNet50 model without the top layers
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Define custom classification layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(64, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x)

# Create the final model
resnet_50 = tf.keras.Model(inputs=base_model.input, outputs=predictions)

# Freeze the layers of the pretrained model
for layer in base_model.layers:
    layer.trainable = False

# Compile model
resnet_50.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train model
resnet_50.fit(datagen.flow(X_train, y_train, batch_size=32), validation_data=(X_test/255, y_test), epochs=10)
test_loss, test_accuracy = resnet_50.evaluate(X_test / 255, y_test, verbose=2)
print(f"Test accuracy: {test_accuracy * 100:.2f}%")

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
9/9 - 52s - loss: 0.2359 - accuracy: 0.9091 - 52s/epoch - 6s/step
Test accuracy: 90.91%


### 3.2 Alexnet

In [None]:
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

def create_alexnet(input_shape=(224, 224, 3), num_classes=1):
    model = tf.keras.Sequential([
        Conv2D(96, (11, 11), strides=(4, 4), activation='relu', input_shape=input_shape),
        MaxPooling2D((3, 3), strides=(2, 2)),
        Conv2D(256, (5, 5), activation='relu', padding='same'),
        MaxPooling2D((3, 3), strides=(2, 2)),
        Conv2D(384, (3, 3), activation='relu', padding='same'),
        Conv2D(384, (3, 3), activation='relu', padding='same'),
        Conv2D(256, (3, 3), activation='relu', padding='same'),
        MaxPooling2D((3, 3), strides=(2, 2)),
        Flatten(),
        Dense(4096, activation='relu'),
        Dropout(0.5),
        Dense(4096, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='sigmoid' if num_classes == 1 else 'softmax')
    ])
    return model

# Create and compile AlexNet model
alexnet = create_alexnet()
alexnet.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train AlexNet model
alexnet.fit(datagen.flow(X_train, y_train, batch_size=32), validation_data=(X_test/255, y_test), epochs=10)
test_loss, test_accuracy = alexnet.evaluate(X_test / 255, y_test, verbose=2)
print(f"Test accuracy: {test_accuracy * 100:.2f}%")


## 3.3 GoogLeNet 

In [None]:
from tensorflow.keras.applications.inception_v3 import InceptionV3
# Load the pretrained InceptionV3 model without the top layers
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Define custom classification layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(64, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x)

# Create the final model
googlenet = tf.keras.Model(inputs=base_model.input, outputs=predictions)

# Freeze the layers of the pretrained model
for layer in base_model.layers:
    layer.trainable = False

# Compile model
googlenet.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train GoogLeNet (InceptionV3) model
googlenet.fit(datagen.flow(X_train, y_train, batch_size=32), validation_data=(X_test/255, y_test), epochs=10)
test_loss, test_accuracy = googlenet.evaluate(X_test / 255, y_test, verbose=2)
print(f"Test accuracy: {test_accuracy * 100:.2f}%")


In [None]:
# Load images and labels

# We make sure it loads RGB things
def load_images(folder_path, label):
    images = []
    labels = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".png"):
            image = Image.open(os.path.join(folder_path, filename)).convert('RGB').resize((224, 224))
            images.append(np.array(image))
            labels.append(label)
    return images, labels

folder_0 = "/content/drive/MyDrive/data/non_hs"
folder_1 = "/content/drive/MyDrive/data/hs"

images_0, labels_0 = load_images(folder_0, 0)
images_1, labels_1 = load_images(folder_1, 1)

images = np.array(images_0 + images_1)
labels = np.array(labels_0 + labels_1)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42, stratify=labels)

# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    rescale=1./255
)

datagen.fit(X_train)

# Define model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Compile model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train model
model.fit(datagen.flow(X_train, y_train, batch_size=32), validation_data=(X_test/255, y_test), epochs=10)