In [1]:
import sagemaker
import boto3
import pandas as pd

#
# THIS WAS CREATED TO RELOAD THE ENVIRONMENT AFTER SHUTTING DOWN FOR THE EVENING!!!
#

# --- IMPORTANT: REPLACE WITH YOUR BUCKET NAME ---
bucket_name = "recproject-recdata"
# ----------------------------------------------

# Get the AWS region and the SageMaker IAM role automatically
sagemaker_session = sagemaker.Session()
aws_region = sagemaker_session.boto_region_name
role = sagemaker.get_execution_role()

# Define the S3 path for our sampled training data
# This is the input for our training job
train_data_s3_uri = f"s3://{bucket_name}/train-data-sampled/"

print(f"Using S3 bucket: {bucket_name}")
print(f"Using IAM role: {role}")
print(f"Training data is located at: {train_data_s3_uri}")

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
Using S3 bucket: recproject-recdata
Using IAM role: arn:aws:iam::468586614495:role/AWSSageMakerRole-SentimentProject
Training data is located at: s3://recproject-recdata/train-data-sampled/


In [2]:
from sagemaker.huggingface import HuggingFace

# Define the hyperparameters that will be passed to train.py
hyperparameters = {
    'epochs': 1,
    'train-batch-size': 32,
    'model-name': 'distilbert-base-uncased'
}

# Configure the Estimator for our training job
huggingface_estimator = HuggingFace(
    entry_point='train.py',           # The script to run
    source_dir='./',                  # The directory containing the script
    instance_type='ml.g4dn.xlarge',   # A powerful GPU instance with 16GB RAM
    instance_count=1,                 # We only need one instance
    role=role,                        # The IAM role with permissions
    transformers_version='4.28',      # Hugging Face library version
    pytorch_version='2.0',            # PyTorch version
    py_version='py310',               # Python version
    hyperparameters=hyperparameters
)

print("Estimator configured. Ready to launch the training job.")

Estimator configured. Ready to launch the training job.


In [None]:
# Launch the training job. 
# The 'training' channel name corresponds to the --training-dir argument in train.py
print("Starting the SageMaker Training Job...")
print("This will take approximately 20-30 minutes.")

huggingface_estimator.fit({'training': train_data_s3_uri})

print("\n✅ Training Job Complete.")

Starting the SageMaker Training Job...
This will take approximately 20-30 minutes.


INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: huggingface-pytorch-training-2025-06-17-19-10-45-607


2025-06-17 19:10:54 Starting - Starting the training job...
2025-06-17 19:11:15 Starting - Preparing the instances for training...
2025-06-17 19:11:47 Downloading - Downloading input data...
2025-06-17 19:12:17 Downloading - Downloading the training image..............................
2025-06-17 19:17:26 Training - Training image download completed. Training in progress...[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2025-06-17 19:17:37,880 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2025-06-17 19:17:37,900 sagemaker-training-toolkit INFO     No Neurons detected (normal if no neurons installed)[0m
[34m2025-06-17 19:17:37,909 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2025-06-17 19:17:37,917 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2025-06-17 1

In [7]:
# Deploy the model to a real-time endpoint
# We use a small, cost-effective instance that is Free Tier eligible
print("Deploying model to a SageMaker Endpoint...")
print("This will take 5-10 minutes.")

predictor = huggingface_estimator.deploy(
    initial_instance_count=1,
    instance_type='ml.t2.medium'
)

print(f"\n✅ Deployment complete! Endpoint name: {predictor.endpoint_name}")

INFO:sagemaker:Creating model with name: huggingface-pytorch-training-2025-06-17-22-01-21-900


Deploying model to a SageMaker Endpoint...
This will take 5-10 minutes.


INFO:sagemaker:Creating endpoint-config with name huggingface-pytorch-training-2025-06-17-22-01-21-900
INFO:sagemaker:Creating endpoint with name huggingface-pytorch-training-2025-06-17-22-01-21-900


-------------------------------!
✅ Deployment complete! Endpoint name: huggingface-pytorch-training-2025-06-17-22-01-21-900


In [8]:
# --- Test the live endpoint with some sample reviews ---

test_reviews = [
    "This is the best musical instrument I have ever owned. The quality is outstanding and the sound is perfect.",
    "The guitar arrived with a huge crack in the body. It's unplayable and the support has been useless.",
    "It's okay for the price, but the tuning pegs feel a little cheap and it doesn't stay in tune very well.",
    "Wow, just wow! I am blown away by the rich tone of this piano."
]

print("--- Sending sample reviews to the endpoint for prediction ---\n")

for review in test_reviews:
    print(f"Review: \"{review}\"")
    result = predictor.predict(data={"inputs": review})
    predicted_label = result[0]['label']
    score = result[0]['score']
    sentiment = "POSITIVE" if predicted_label == 'LABEL_1' else "NEGATIVE"
    print(f"--> Prediction: {sentiment} (Confidence: {score:.2%})\n")

--- Sending sample reviews to the endpoint for prediction ---

Review: "This is the best musical instrument I have ever owned. The quality is outstanding and the sound is perfect."
--> Prediction: POSITIVE (Confidence: 99.96%)

Review: "The guitar arrived with a huge crack in the body. It's unplayable and the support has been useless."
--> Prediction: NEGATIVE (Confidence: 98.42%)

Review: "It's okay for the price, but the tuning pegs feel a little cheap and it doesn't stay in tune very well."
--> Prediction: NEGATIVE (Confidence: 79.53%)

Review: "Wow, just wow! I am blown away by the rich tone of this piano."
--> Prediction: POSITIVE (Confidence: 99.94%)



In [None]:
print(f"Deleting the SageMaker Endpoint: {predictor.endpoint_name}...")
predictor.delete_endpoint()
print("Endpoint deleted successfully.")