# Ingesting, subsetting, and renaming xray files

## Setting up enviornment

These next couple of cells stores a location of S3 for data to be copies and stores another location of an S3 bucket file path for the destination.

In [None]:
import boto3
import sagemaker
import pandas as pd
import io
import numpy as np
import shutil
import os
import random

from io import BytesIO
from PIL import Image


sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()
bucket = sagemaker_session.default_bucket()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name="sagemaker", region_name=region)
s3 = boto3.client('s3')

In [None]:
# setting S3 source location
public_path = "s3://508-xray-project/sampling_folder"

In [None]:
# storing public path
%store public_path

In [None]:
# setting up S3 destination location
private_path = "s3://project508data/new_sampling".format(bucket)

In [None]:
# storing private path
%store private_path

In [None]:
# copy data from one s3 bucket into another S3 bucket
!aws s3 cp  $public_path $private_path --recursive

In [None]:
!aws s3 ls s3://$bucket/project508data

In [None]:
!aws s3 ls 

## Normalizing images

In [None]:
# Normalize the pixels of each image on a sale of [0, 1]
# Initialize S3 client
s3 = boto3.client('s3')

def normalize_image(bucket_name, folder_prefix):
    # List objects in the specified folder of the bucket
    response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_prefix)

    # Process each object in the folder
    for obj in response.get('Contents', []):
        # Get object key
        object_key = obj['Key']

        # Download image from S3
        response = s3.get_object(Bucket=bucket_name, Key=object_key)
        image_bytes = response['Body'].read()

        # Normalize image
        image = Image.open(io.BytesIO(image_bytes))
        image_array = np.array(image)
        normalized_image_array = image_array / 255.0  

        # Convert normalized image array back to image
        normalized_image = Image.fromarray((normalized_image_array * 255).astype(np.uint8))

        # Save normalized image to bytes
        with io.BytesIO() as output:
            normalized_image.save(output, format='JPEG')
            normalized_image_bytes = output.getvalue()

        # Upload normalized image back to S3
        s3.put_object(Bucket=bucket_name, Key=object_key, Body=normalized_image_bytes)

# Set bucket name and folder prefix
bucket_name = '508-xray-project'
folder_prefix = 'sampling_folder/'

# Normalize images in the specified folder
normalize_image(bucket_name, folder_prefix)

## Resizing Images

In [None]:
# Function to resize images in the sampling folder
def resize_images_in_folder(bucket_name, folder_prefix, new_size):
    # List objects in the specified folder
    response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_prefix)
    
    # Iterate over objects in the folder
    for obj in response.get('Contents', []):
        # Get the object key
        key = obj['Key']
        
        # Download image from S3
        response = s3.get_object(Bucket=bucket_name, Key=key)
        image_bytes = response['Body'].read()
        
        # Resize image
        image = Image.open(BytesIO(image_bytes))
        resized_image = image.resize(new_size)
        
        # Save resized image to bytes buffer
        with BytesIO() as output:
            resized_image.save(output, format=image.format)
            resized_image_bytes = output.getvalue()
        
        # Upload resized image back to S3
        s3.put_object(Bucket=bucket_name, Key=key, Body=resized_image_bytes)

# Define the bucket name and folder prefix
bucket_name = '508-xray-project'
folder_prefix = 'sampling_folder/'

# Define the new size for the resized images
new_size = (224, 224)  

# Resize images in the sampling folder
resize_images_in_folder(bucket_name, folder_prefix, new_size)

print("Images resized successfully.")

## Splitting data into train, validation, and test folders

In [None]:
# Create random sample to achieve a subset 3000 files, 1000 from each folder  
# Initialize S3 client
s3 = boto3.client('s3')

def random_sampling(source_bucket, source_folder, destination_bucket, destination_folder, num_samples):
    # List objects in the source folder
    response = s3.list_objects_v2(Bucket=source_bucket, Prefix=source_folder)

    # Extract object keys
    object_keys = [obj['Key'] for obj in response.get('Contents', [])]

    # Calculate the proportion of samples to take from this folder
    proportion = min(num_samples, len(object_keys)) / len(object_keys)

    # Randomly select a subset of object keys
    sampled_object_keys = random.sample(object_keys, min(num_samples, len(object_keys)))

    # Copy sampled objects to the destination folder
    for key in sampled_object_keys:
        source_key = key
        destination_key = key.replace(source_folder, destination_folder, 1)
        s3.copy_object(CopySource={'Bucket': source_bucket, 'Key': source_key}, Bucket=destination_bucket, Key=destination_key)

# Set parameters
source_bucket = '508-xray-project'
destination_bucket = '508-xray-project'
num_samples_per_folder = 1000

# Sampling for each folder
folders_to_sample = ['normal/', 'pneumonia/', 'covid/train/']
for folder in folders_to_sample:
    random_sampling(source_bucket, folder, destination_bucket, 'sampling_folder/', num_samples_per_folder)

In [None]:
# Count number of files in sampling_folder to confirm the sampling plan 
s3 = boto3.client('s3')
bucket_name = '508-xray-project'

# Specify the prefix (folder) within the bucket
folder_prefix = 'sampling_folder/'

# Initialize the counter for objects
num_files = 0

# Pagination loop to list all objects in the specified folder
paginator = s3.get_paginator('list_objects_v2')
for page in paginator.paginate(Bucket=bucket_name, Prefix=folder_prefix):
    if 'Contents' in page:
        num_files += len(page['Contents'])

print("Number of files in the folder:", num_files)

In [None]:
def count_files_in_folder(bucket_name, folder_prefix):
    # Create a Boto3 client for S3
    s3_client = boto3.client('s3')
    
    # Initialize counters
    normal_count = 0
    pneumonia_count = 0
    covid_count = 0
   
 # List objects in the specified folder
    paginator = s3_client.get_paginator('list_objects_v2')
    pages = paginator.paginate(Bucket=bucket_name, Prefix=folder_prefix)
    for page in pages:
        if 'Contents' in page:
            for obj in page['Contents']:
                # Extract the object key
                obj_key = obj['Key']
                # Count files based on their category
                if 'normal' in obj_key:
                    normal_count += 1
                elif 'pneumonia' in obj_key:
                    pneumonia_count += 1
                elif 'covid' in obj_key:
                    covid_count += 1
    return normal_count, pneumonia_count, covid_count

# Specify your S3 bucket name and folder prefix
bucket_name = '508-xray-project'
folder_prefix = 'sampling_folder/'

# Call the function to count files
normal_count, pneumonia_count, covid_count = count_files_in_folder(bucket_name, folder_prefix)

# Print the counts
print("Normal files:", normal_count)
print("Pneumonia files:", pneumonia_count)
print("COVID files:", covid_count)

In [None]:
# Initialize S3 client
s3 = boto3.client('s3')

# Define the bucket name and folder prefixes
bucket_name = '508-xray-project'
source_folder_prefix = 'sampling_folder/'
train_folder_prefix = 'train/'
test_folder_prefix = 'test/'
validation_folder_prefix = 'validation/'

# Function to copy files from source folder to destination folder
def copy_files(source_bucket, source_prefix, destination_bucket, destination_prefix, files):
    for file in files:
        source_key = source_prefix + file
        destination_key = destination_prefix + file
        s3.copy_object(
            CopySource={'Bucket': source_bucket, 'Key': source_key},
            Bucket=destination_bucket,
            Key=destination_key
        )

# List files in the source folder
response = s3.list_objects_v2(Bucket=bucket_name, Prefix=source_folder_prefix)
files = [obj['Key'].split('/')[-1] for obj in response.get('Contents', [])]

# Calculate the number of files for each set
num_files = len(files)
num_train = int(0.9 * num_files)
num_test = int(0.05 * num_files)
num_validation = num_files - num_train - num_test

# Split files into train, test, and validation sets
train_files = files[:num_train]
test_files = files[num_train:num_train + num_test]
validation_files = files[num_train + num_test:]

# Copy files to train, test, and validation folders
copy_files(bucket_name, source_folder_prefix, bucket_name, train_folder_prefix, train_files)
copy_files(bucket_name, source_folder_prefix, bucket_name, test_folder_prefix, test_files)
copy_files(bucket_name, source_folder_prefix, bucket_name, validation_folder_prefix, validation_files)

print("Data split into train, test, and validation sets successfully.")

## Getting the names of which xrays are covid and which are normal based on the .txt file and deleting xrays

When the data was copied over from another bucket, the folder "covid_xrays" had xrays that were labelled as "normal", we don't want those. We only require the xrays that were covid, so we deleted them from the folder. Since not all covid xrays were uploaded, a list of the filenames were subsetted to include only those that had "covid" contained in the filenames. Then, that short list is matched to the filenames currently in the bucket. Those that weren't on the list were deleted.

The same thing was done for the normal xrays in the normal folder.

In [None]:
# Deleting the normal xrays from the covid/covid_xrays/train folder
# getting the object (file) from s3 bucket
bucket_name = 'project508data'
file_path = 'covid/train.txt'

obj = s3.get_object(Bucket=bucket_name, Key=file_path)

# Read the text file into a DataFrame
# Assuming the file is space-separated and does not contain column headers
train_df = pd.read_csv(obj['Body'], sep=' ', header=None)

# adding in headers to the df
train_df.columns = ['patient id', 'filename', 'labels', 'data source'] 

# dropping data source column
train_df = train_df.drop(['patient id','data source'], axis=1 ) 

# checking to see which files are positive and which ones are negative
train_df.groupby('labels').size()

In [None]:
# only getting the files that have the negative label
negative_df = train_df[train_df['labels'] == 'negative'].reset_index(drop = True)
negative_df

In [None]:
# checking how many files we currently have in the folder
!aws s3 ls s3://project508data/covid/train --recursive | grep -c "train"

In [None]:
# filepath information
prefix_to_delete = 'covid/train/'

# Iterate over the DataFrame and delete files in S3 based on filename
for index, row in negative_df.iterrows():
    filename = row['filename']  # Replace 'filename_column' with the actual column name
    key_to_delete = prefix_to_delete + filename
    
    # Delete the object from S3 if it exists
    try:
        s3.delete_object(Bucket=bucket_name, Key=key_to_delete)
        print(f"Deleted {key_to_delete} from S3")
    except Exception as e:
        print(f"Error deleting {key_to_delete} from S3: {str(e)}")


In [None]:
# checking how many files we currently have in the folder after the deletions
!aws s3 ls s3://project508data/covid/train --recursive | grep -c "train"

In [None]:
# Deleting the normal xrays from the validation folder
# getting the object (text file) from s3 bucket
s3 = boto3.client('s3')

bucket_name = 'project508data'
file_path = 'covid/val.txt'

obj = s3.get_object(Bucket=bucket_name, Key=file_path)

# Read the text file into a DataFrame
# Assuming the file is space-separated and does not contain column headers
val_df = pd.read_csv(obj['Body'], sep=' ', header=None)

# adding in headers to the df
val_df.columns = ['patient id', 'filename', 'labels', 'data source'] 

# dropping data source column
val_df = val_df.drop(['patient id','data source'], axis=1 ) 

# checking to see which files are positive and which ones are negative
val_df.groupby('labels').size()

In [None]:
# only getting the files that have the negative label
negative_val_df = val_df[val_df['labels'] == 'negative'].reset_index(drop = True)
negative_val_df

In [None]:
# deleting the files from s3
# filepath information
prefix_to_delete = 'covid/val/'

# Iterate over the DataFrame and delete files in S3 based on filename
for index, row in negative_val_df.iterrows():
    filename = row['filename']  # Replace 'filename_column' with the actual column name
    key_to_delete = prefix_to_delete + filename
    
    # Delete the object from S3 if it exists
    try:
        s3.delete_object(Bucket=bucket_name, Key=key_to_delete)
        print(f"Deleted {key_to_delete} from S3")
    except Exception as e:
        print(f"Error deleting {key_to_delete} from S3: {str(e)}")

In [None]:
# checking how many files we currently have in the folder
!aws s3 ls s3://project508data/covid/val --recursive | grep -c "val"

In [None]:
# checking how many files we currently have in the folder
# since there are only two files that were covid, I manually moved them in S3
!aws s3 ls s3://project508data/covid/val --recursive | grep -c "val"

## Renaming the covid xrays

In this part of the code, the files that contained covid xrays were renamed as covid_#, pneumonia xrays were renamed as pneumonia_#, and normal xrays renamed as normal_#.

In [None]:
# renaming all covid xrays
# List Objects in Nested Folder
def list_objects(bucket, prefix=''):
    paginator = s3.get_paginator('list_objects_v2')
    operation_parameters = {'Bucket': bucket, 'Prefix': prefix}
    for page in paginator.paginate(**operation_parameters):
        if 'Contents' in page:
            for obj in page['Contents']:
                yield obj['Key']

bucket_name = 'project508data'
prefix = 'covid/train/'  # Specify the path to the nested folder
objects = list(list_objects(bucket_name, prefix))

# Iterate Over Objects Starting from the Second Object and Rename
counter = 1
for old_key in objects[1:]:
    # extracting the file name and extension
    filename, extension = old_key.rsplit('.', 1)
    # desired new file name
    new_filename = f"covid_{counter}.{extension}"
    # Rename object
    new_key = prefix + new_filename
    s3.copy_object(Bucket=bucket_name, CopySource=f"{bucket_name}/{old_key}", Key=new_key)
    s3.delete_object(Bucket=bucket_name, Key=old_key)
    # Increment counter
    counter += 1

In [None]:
!aws s3 ls s3://project508data/new_sampling --recursive | grep -c "new_sampling"

In [3]:
# copy data from one s3 bucket into another S3 bucket
old_path = "s3://project508data/test/pneumonia/"
new_path = "s3://project508data/test/"
!aws s3 cp  $old_path $new_path --recursive

In [11]:
!aws s3 ls s3://project508data/train/ --recursive | grep -c "train/"

2700


## Renaming the Pneumonia Xrays

In [None]:
# renaming all pneumonia xrays
# List Objects in Nested Folder
def list_objects(bucket, prefix=''):
    paginator = s3.get_paginator('list_objects_v2')
    operation_parameters = {'Bucket': bucket, 'Prefix': prefix}
    for page in paginator.paginate(**operation_parameters):
        if 'Contents' in page:
            for obj in page['Contents']:
                yield obj['Key']

bucket_name = 'project508data'
prefix = 'pneumonia/'  # Specify the path to the files
objects = list(list_objects(bucket_name, prefix))

# Iterate Over Objects Starting from the Second Object and Rename
counter = 1
for old_key in objects[1:]:
    # extracting the file name and extension
    filename, extension = old_key.rsplit('.', 1)
    # desired new file name
    new_filename = f"pneumonia_{counter}.{extension}"
    # Rename object
    new_key = prefix + new_filename
    s3.copy_object(Bucket=bucket_name, CopySource=f"{bucket_name}/{old_key}", Key=new_key)
    s3.delete_object(Bucket=bucket_name, Key=old_key)
    # Increment counter
    counter += 1

In [None]:
!aws s3 ls s3://project508data/pneumonia --recursive | grep -c "pneumonia"

## Renaming the Normal Xrays

In [None]:
!aws s3 ls s3://project508data/normal --recursive | grep -c "normal"

In [None]:
# renaming all normal xrays
# List Objects in Nested Folder
def list_objects(bucket, prefix=''):
    paginator = s3.get_paginator('list_objects_v2')
    operation_parameters = {'Bucket': bucket, 'Prefix': prefix}
    for page in paginator.paginate(**operation_parameters):
        if 'Contents' in page:
            for obj in page['Contents']:
                yield obj['Key']

bucket_name = 'project508data'
prefix = 'normal/'  # Specify the path to the files
objects = list(list_objects(bucket_name, prefix))

# Iterate Over Objects Starting from the Second Object and Rename
counter = 1
for old_key in objects[1:]:
    # extracting the file name and extension
    filename, extension = old_key.rsplit('.', 1)
    # desired new file name
    new_filename = f"normal_{counter}.{extension}"
    # Rename object
    new_key = prefix + new_filename
    s3.copy_object(Bucket=bucket_name, CopySource=f"{bucket_name}/{old_key}", Key=new_key)
    s3.delete_object(Bucket=bucket_name, Key=old_key)
    # Increment counter
    counter += 1

## Splitting data into train/val/test folders

In [None]:
# Connect to S3
s3 = boto3.client('s3')

import boto3
import random
import os

# Connect to S3
s3 = boto3.client('s3')

# Define bucket name and folder paths
bucket_name = 'project508data'
input_folder_path = 'new_sampling'
output_train_folder_path = 'train'
output_validation_folder_path = 'validation'
output_test_folder_path = 'test'

# initialize list to store file keys
file_keys = []

# List files in the input folder
# Paginator to handle pagination of S3 list objects response
paginator = s3.get_paginator('list_objects_v2')

# Paginate through all objects in the bucket
for page in paginator.paginate(Bucket=bucket_name, Prefix=input_folder_path):
    if 'Contents' in page:
        for obj in page['Contents']:
            file_keys.append(obj['Key'])

# Shuffle the files
random.shuffle(file_keys)

# Calculate the number of files for each category
num_files = len(file_keys)
num_normal_files = sum('normal' in file for file in file_keys)
num_pneumonia_files = sum('pneumonia' in file for file in file_keys)
num_covid_files = sum('covid' in file for file in file_keys)

# Calculate the number of files for each set
num_train_files = int(0.9 * num_files)
num_validation_files = num_test_files = int(0.05 * num_files)

# Create output folders
s3.put_object(Bucket=bucket_name, Key=output_train_folder_path + '/')
s3.put_object(Bucket=bucket_name, Key=output_validation_folder_path + '/')
s3.put_object(Bucket=bucket_name, Key=output_test_folder_path + '/')

# Copy files to output folders
def copy_files(files, output_folder_path, num_files_to_copy):
    for file in files[:num_files_to_copy]:
        output_key = os.path.join(output_folder_path, os.path.basename(file))
        s3.copy_object(CopySource={'Bucket': bucket_name, 'Key': file}, Bucket=bucket_name, Key=output_key)

# Copy files to train, validation, and test folders
copy_files(file_keys, output_train_folder_path, num_train_files)
copy_files([file for file in file_keys if file not in file_keys[:num_train_files]], output_validation_folder_path, num_validation_files)
copy_files([file for file in file_keys if file not in file_keys[:num_train_files+num_validation_files]], output_test_folder_path, num_test_files)

print("Files copied successfully.")

In [None]:
num_train_files

In [7]:
# checking the counts of each category in each folder
import boto3
def count_files_in_folder(bucket_name, folder_prefix):
    # Create a Boto3 client for S3
    s3_client = boto3.client('s3')
    # Initialize counters
    normal_count = 0
    pneumonia_count = 0
    covid_count = 0
    # List objects in the specified folder
    paginator = s3_client.get_paginator('list_objects_v2')
    pages = paginator.paginate(Bucket=bucket_name, Prefix=folder_prefix)
    for page in pages:
        if 'Contents' in page:
            for obj in page['Contents']:
                # Extract the object key
                obj_key = obj['Key']
                # Count files based on their category
                if 'normal' in obj_key:
                    normal_count += 1
                elif 'pneumonia' in obj_key:
                    pneumonia_count += 1
                elif 'covid' in obj_key:
                    covid_count += 1
    return normal_count, pneumonia_count, covid_count
# Specify your S3 bucket name and folder prefix
bucket_name = 'project508data'
folder_prefix = 'train/'
# Call the function to count files
normal_count, pneumonia_count, covid_count = count_files_in_folder(bucket_name, folder_prefix)
# Print the counts
print("Normal files:", normal_count)
print("Pneumonia files:", pneumonia_count)
print("COVID files:", covid_count)

Normal files: 903
Pneumonia files: 908
COVID files: 888


## Getting rid of .png files

Originally there were 2700 images for training, only deleted 10 files.
Originally there were 150 images for validation, only deleted 2 files.
Originally there were 150 images for test, no deletions.

In [31]:
import boto3

def delete_png_files(bucket_name, folder_prefix):
    # Create a Boto3 S3 client
    s3_client = boto3.client('s3')

    # Create a paginator to handle listing objects in the bucket
    paginator = s3_client.get_paginator('list_objects_v2')

    # Iterate through pages of objects in the specified folder
    for page in paginator.paginate(Bucket=bucket_name, Prefix=folder_prefix):
        # Check if any objects were found in the page
        if 'Contents' in page:
            # Iterate through objects and delete the ones ending with '.png'
            for obj in page['Contents']:
                key = obj['Key']
                if key.endswith('.png'):
                    s3_client.delete_object(Bucket=bucket_name, Key=key)
                    print(f"Deleted: {key}")

    print("Deletion complete.")

# Specify the bucket name and folder prefix
bucket_name = 'project508data'
folder_prefix = 'test/'

# Call the function to delete PNG files in the specified folder
delete_png_files(bucket_name, folder_prefix)


Deletion complete.


## Renaming files in the validation and test folders

In [None]:
s3 = boto3.client('s3')

# Define S3 bucket and folder path
bucket_name = 'project508data'
folder_path = 'validation/'

# List objects in the specified S3 folder
response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_path)

# Define renaming conventions and counters
covid_counter = 1
pneumonia_counter = 1
normal_counter = 1

# Iterate through objects in the bucket
for obj in response['Contents']:
    key = obj['Key']
    
    # Check if the object's key contains the word "covid" and rename accordingly
    if 'covid' in key:
        # extracting the file name and extension
        filename, extension = key.rsplit('.', 1)
        
        # Rename object
        new_filename = f'x_{covid_counter}.{extension}'
        new_key = folder_path + new_filename
        s3.copy_object(Bucket=bucket_name, CopySource=f'{bucket_name}/{key}', Key=new_key)
        s3.delete_object(Bucket=bucket_name, Key=key)
        covid_counter += 1
    
    # Check if the object's key contains the word "pneumonia" and rename accordingly
    elif 'pneumonia' in key:
        # extracting the file name and extension
        filename, extension = key.rsplit('.', 1)
        # Rename object
        new_filename = f'y_{pneumonia_counter}.{extension}'
        new_key = folder_path + new_filename
        s3.copy_object(Bucket=bucket_name, CopySource=f'{bucket_name}/{key}', Key=new_key)
        s3.delete_object(Bucket=bucket_name,Key=key)
        pneumonia_counter += 1
        
    # Check if the object's key contains the word "normal" and rename accordingly
    elif 'normal' in key:
        # extracting the file name and extension
        filename, extension = key.rsplit('.', 1)
        # Rename object
        new_filename = f'z_{normal_counter}.{extension}'
        new_key = folder_path + new_filename
        s3.copy_object(Bucket=bucket_name, CopySource=f'{bucket_name}/{key}', Key=new_key)
        s3.delete_object(Bucket=bucket_name, Key=key)
        normal_counter += 1
        
    else:
        print("Done Renaming")

In [None]:
s3 = boto3.client('s3')

# Define S3 bucket and folder path
bucket_name = 'project508data'
folder_path = 'test/'

# List objects in the specified S3 folder
response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_path)


In [None]:
!aws s3 ls s3://project508data/test --recursive | grep -c "test"

# Renaming the files that have .jpg to .jpeg to prep for Sagemaker's image classification algorithm

The folder path was changed multiple times to include the train/validation/test folders.

In [None]:
# Specify bucket name and folder path
bucket_name = 'project508data'
folder_path = 'test/'

# Create a paginator to handle listing objects in the bucket
paginator = s3.get_paginator('list_objects_v2')

# Iterate through pages of objects in the specified folder
for page in paginator.paginate(Bucket=bucket_name, Prefix=folder_path):
    # Check if any objects were found in the page
    if 'Contents' in page:
        # Iterate through objects and rename files
        for obj in page['Contents']:
            file_key = obj['Key']
            if file_key.endswith('.jpg'):
                new_key = file_key[:-4] + '.jpeg'  # Change file extension
                s3.copy_object(
                    CopySource={'Bucket': bucket_name, 'Key': file_key},
                    Bucket=bucket_name,
                    Key=new_key
                )
                s3.delete_object(Bucket=bucket_name, Key=file_key)
                print(f"Renamed {file_key} to {new_key}")

print("Renaming complete.")

After renaming, there was an extra period introduced to the extension name. So this bit of code is just renaming it.

In [42]:
import boto3

# Initialize S3 client
s3_client = boto3.client('s3')

# Specify bucket name and folder path
bucket_name = 'project508data'
folder_path = 'train/'

# Function to remove extra dots from file names
def remove_extra_dots(bucket, key):
    # Check if the file name contains extra dots
    if '..' in key:
        new_key = key.replace('..', '.')  # Replace extra dots with a single dot
        # Copy object with corrected file name
        s3_client.copy_object(
            Bucket=bucket,
            CopySource={'Bucket': bucket, 'Key': key},
            Key=new_key
        )
        # Delete original object
        s3_client.delete_object(Bucket=bucket, Key=key)
        print(f"Renamed {key} to {new_key}")

# Paginate through objects in the folder
paginator = s3_client.get_paginator('list_objects_v2')
for page in paginator.paginate(Bucket=bucket_name, Prefix=folder_path):
    for obj in page.get('Contents', []):
        file_key = obj['Key']
        # Remove extra dots from file name
        remove_extra_dots(bucket_name, file_key)

Renamed train/normal_1..jpeg to train/normal_1.jpeg
Renamed train/normal_10..jpeg to train/normal_10.jpeg
Renamed train/normal_100..jpeg to train/normal_100.jpeg
Renamed train/normal_1000..jpeg to train/normal_1000.jpeg
Renamed train/normal_1001..jpeg to train/normal_1001.jpeg
Renamed train/normal_1002..jpeg to train/normal_1002.jpeg
Renamed train/normal_1003..jpeg to train/normal_1003.jpeg
Renamed train/normal_1004..jpeg to train/normal_1004.jpeg
Renamed train/normal_1005..jpeg to train/normal_1005.jpeg
Renamed train/normal_1006..jpeg to train/normal_1006.jpeg
Renamed train/normal_1007..jpeg to train/normal_1007.jpeg
Renamed train/normal_1008..jpeg to train/normal_1008.jpeg
Renamed train/normal_1009..jpeg to train/normal_1009.jpeg
Renamed train/normal_101..jpeg to train/normal_101.jpeg
Renamed train/normal_1010..jpeg to train/normal_1010.jpeg
Renamed train/normal_1011..jpeg to train/normal_1011.jpeg
Renamed train/normal_1012..jpeg to train/normal_1012.jpeg
Renamed train/normal_1013..j

# Release Resources

In [None]:
%%html

<p><b>Shutting down your kernel for this notebook to release resources.</b></p>
<button class="sm-command-button" data-commandlinker-command="kernelmenu:shutdown" style="display:none;">Shutdown Kernel</button>
        
<script>
try {
    els = document.getElementsByClassName("sm-command-button");
    els[0].click();
}
catch(err) {
    // NoOp
}    
</script>

In [None]:
%%javascript

try {
    Jupyter.notebook.save_checkpoint();
    Jupyter.notebook.session.delete();
}
catch(err) {
    // NoOp
}