## Intro
This file is used to run in AWS with the S3 bucket set up.

In [None]:
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from io import BytesIO
from PIL import Image
import boto3
import os
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

In [None]:
# Initialize S3 client
s3_client = boto3.client('s3')

In [None]:
# Function to downlaod image from S3 bucket

def open_image_from_s3(bucket_name, key):
    response = s3_client.get_object(Bucket=bucket_name, Key=key)
    img_data = response['Body'].read()
    img = Image.open(BytesIO(img_data))
    return img

In [None]:
# S3 bucket
bucket_name = 'sage,aler-team6-distracted-drivers'

# Excel file with image data path
excel_file = 'driver_imgs_list.csv'
data_df = pd.read_csv(excel_file)

#Selected image names for specific driver
driver_p014_df = data_df[data_df['subject'] == 'p014']
image_counts = {f'c{i}': 0 for i in range(10)}
sampled_images = []

# Creating a scrap folder to save the images so S3 bucket images aren't altered
save_dir = '/home/sagemaker-user/Big-Data-Systems/Team 6/scrap_photo'"DS5100_ Group 6 Presentation.pdf"
os.makedirs(save_dir, exist_ok=True)

In [None]:
# Saving selected images

for index, row in driver_p014_df.itrerrows():
    if image_counts[row['classname']] < 20:
        key = f"Imgs/train/{row['classname']}/{row['img']}"
        img = open_image_from_s3(bucket_name, key)
        sampled_images.append((img, row['classname']))
        img.save(os.path.join(save_dir, os.path.basename(key)))

        image_counts[row['classname']] += 1        

In [None]:
sampled_df = pd.DataFrame(sampled_images, columns=['img', 'classnmame'])

In [None]:
np. random.seed(333)

datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

img_height = 180
img_width = 180
batch_size = 8

train_ds = datagen.flow_from_dataframe(
    dataframe=sampled_df,
    x_col='img',
    y_col='classname',
    target_size=(img_height, img_width),
    batch_size=batch_size,
    subset='training'
)

valid_ds = datagen.flow_from_dataframe(
    dataframe=sampled_df,
    x_col='img',
    y_col='classname',
    target_size=(img_height, img_width),
    batch_size=batch_size,
    subset='validation'
)

In [None]:
%%html

<p><b>Shutting down your kernel for this notebook to release resources.</b></p>
<button class="sm-command-button" data-commandlinker-command="kernelmenu:shutdown" style="display:none;">Shutdown Kernel</button>
        
<script>
try {
    els = document.getElementsByClassName("sm-command-button");
    els[0].click();
}
catch(err) {
    // NoOp
}    
</script>

In [None]:
%%javascript

try {
    Jupyter.notebook.save_checkpoint();
    Jupyter.notebook.session.delete();
}
catch(err) {
    // NoOp
}