In [None]:
import os
import urllib.request
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_openml

# Download MNIST dataset
def download_mnist(data_dir):
    mnist = fetch_openml('mnist_784', version=1)
    images = mnist.data.values.reshape(-1, 28, 28)
    labels = mnist.target.astype(int)

    # Create directories if they don't exist
    if not os.path.isdir(data_dir):
        os.mkdir(data_dir)

    # Save images and labels to files
    for i in range(len(images)):
        image = images[i]
        label = labels[i]
        image_path = os.path.join(data_dir, f'{i}.png')
        label_path = os.path.join(data_dir, f'{i}.txt')

        # Save image
        img = Image.fromarray(image)
        img.save(image_path)

        # Save label
        with open(label_path, 'w') as f:
            f.write(str(label))

download_mnist('mnist_data')


In [None]:
import sagemaker
from sagemaker.image_uris import retrieve

# Initialize SageMaker session and role
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()

# Retrieve the container image URI for training
training_image = retrieve('image-classification', sagemaker_session.boto_session.region_name)


In [None]:
import shutil

# Prepare directories for training and validation
def prepare_directories():
    folders = ['train', 'validation']
    for folder in folders:
        if os.path.isdir(folder):
            shutil.rmtree(folder)
        os.mkdir(folder)

prepare_directories()

# Split data into training and validation
import random
from PIL import Image

def split_data(data_dir, train_ratio=0.8):
    files = [f for f in os.listdir(data_dir) if f.endswith('.png')]
    random.shuffle(files)
    split_index = int(len(files) * train_ratio)

    for i, file in enumerate(files):
        source_path = os.path.join(data_dir, file)
        target_dir = 'train' if i < split_index else 'validation'

        shutil.copy(source_path, os.path.join(target_dir, file))

split_data('mnist_data')


In [None]:
from sagemaker import get_execution_role
from sagemaker.estimator import Estimator

# Define the SageMaker Estimator
model = Estimator(
    image_uri=training_image,
    role=role,
    instance_count=1,
    instance_type='ml.p3.2xlarge',
    volume_size=50,
    max_run=3600,
    output_path='s3://YOUR_BUCKET_NAME/mnist_output',
    sagemaker_session=sagemaker_session
)

# Set hyperparameters
model.set_hyperparameters(
    num_classes=10,
    epochs=10,
    batch_size=32,
    learning_rate=0.001
)

# Specify data channels
train_data = sagemaker.inputs.TrainingInput(
    s3_data='s3://YOUR_BUCKET_NAME/train',
    content_type='image/png'
)
validation_data = sagemaker.inputs.TrainingInput(
    s3_data='s3://YOUR_BUCKET_NAME/validation',
    content_type='image/png'
)

# Train the model
model.fit({'train': train_data, 'validation': validation_data})


In [None]:
# Deploy the model
predictor = model.deploy(
    initial_instance_count=1,
    instance_type='ml.m4.xlarge'
)


In [None]:
import matplotlib.pyplot as plt

# Function to make predictions
def predict_image(image_path):
    with open(image_path, 'rb') as f:
        response = predictor.predict(f.read())
    return response

# Test with an image
image_path = 'validation/0.png'
prediction = predict_image(image_path)
print(f'Prediction: {prediction}')

# Display the image
img = Image.open(image_path)
plt.imshow(img, cmap='gray')
plt.title(f'Prediction: {prediction}')
plt.show()


In [None]:
# 7. Clean Up
# Delete the endpoint to avoid incurring costs
predictor.delete_endpoint()

print('Endpoint deleted successfully.')