# Imports

In [4]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from win10toast import ToastNotifier
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
toaster = ToastNotifier()
# toaster.show_toast("Test",
#                    duration=10)

# Load and Preprocess data

In [3]:
# Load data
train_df = pd.read_csv('dataset/Training_set.csv')
test_df = pd.read_csv('dataset/Testing_set.csv')

# Split training data
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42)
train_df = train_df.sample(frac=0.001, random_state=42)  # Use only 10% of the data
val_df = val_df.sample(frac=0.001, random_state=42)
test_df = test_df.sample(frac=0.001, random_state=42)

# Set Up Data Generators

In [5]:
# Set up generators with data augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)


# Create Generators

In [None]:
# Create data generators
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory='dataset/train',
    x_col='filename',
    y_col='label',
    target_size=(32, 32),
    batch_size=16,
    class_mode='categorical'
)

val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_df,
    directory='dataset/train',
    x_col='filename',
    y_col='label',
    target_size=(32, 32),
    batch_size=16,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    directory='dataset/test',
    x_col='filename',
    target_size=(32, 32),
    batch_size=16,
    class_mode=None,
    shuffle=False
)


# Pipeline building

In [9]:
pipeRF = Pipeline([
    ('classifier', RandomForestClassifier(n_estimators=1, max_depth=1, random_state=42))
])
pipeRF.fit(train_generator, val_generator)
    
original_accuracy = pipeRF.score(test_generator, val_generator)
print(f"Original Pipeline Accuracy: {original_accuracy:.4f}")

toaster = ToastNotifier()
toaster.show_toast("Ur Toast is ready", 
                   "Your machine learning model has finished running",
                   duration=10)


KeyboardInterrupt: 

# Grid Search

In [None]:
param_grid = {
    'classifier__C': [0.1, 1, 10],
    'classifier__gamma': ['scale', 'auto'],
    'classifier__kernel': ['rbf', 'linear']
}




# Results and predictions

In [None]:
improvement = (grid_search.best_score_ - original_accuracy) * 100
print(f"\nAccuracy Improvement: {improvement:.2f}%")

predictions = pipeRF.predict(X_test_features)
test_df['predicted_label'] = predictions
