In [1]:
!pip install pandas sagemaker



In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
import sagemaker
from sagemaker import get_execution_role



sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [3]:
role = get_execution_role()
session = sagemaker.Session()
bucket = session.default_bucket()  # SageMaker will create an S3 bucket for you


In [4]:
red_wine = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv", sep=';')
white_wine = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv", sep=';')


In [5]:
red_wine['type'] = 0
white_wine['type'] = 1
data = pd.concat([red_wine, white_wine])


In [6]:
train_data = data.sample(frac=0.8, random_state=42)
test_data = data.drop(train_data.index)

In [8]:
train_data.to_csv('train.csv', index=False, header=False)
test_data.to_csv('test.csv', index=False, header=False)

In [9]:
s3_train_path = session.upload_data('train.csv', bucket=bucket, key_prefix='wine/train')
s3_test_path = session.upload_data('test.csv', bucket=bucket, key_prefix='wine/test')


In [10]:
print(f"Training data uploaded to: {s3_train_path}")
print(f"Test data uploaded to: {s3_test_path}")

Training data uploaded to: s3://sagemaker-us-east-2-056614919553/wine/train/train.csv
Test data uploaded to: s3://sagemaker-us-east-2-056614919553/wine/test/test.csv


In [11]:
from sagemaker.amazon.amazon_estimator import get_image_uri


In [12]:
container = sagemaker.image_uris.retrieve('xgboost', session.boto_region_name, '1.2-1')


In [13]:
xgb = sagemaker.estimator.Estimator(
    container,
    role=role,
    instance_count=1,
    instance_type='ml.m5.large',
    output_path=f's3://{bucket}/wine/output',
    sagemaker_session=session
)


In [14]:
xgb.set_hyperparameters(
    objective='reg:squarederror',  # For regression
    num_round=100,                # Number of training rounds
    max_depth=5                   # Tree depth
)


In [15]:
xgb.fit({'train': s3_train_path, 'test': s3_test_path})

2025-05-25 00:55:27 Starting - Starting the training job......
2025-05-25 00:56:18 Starting - Preparing the instances for training...
2025-05-25 00:56:38 Downloading - Downloading input data...
2025-05-25 00:57:23 Downloading - Downloading the training image......
2025-05-25 00:58:29 Training - Training image download completed. Training in progress.
2025-05-25 00:58:29 Uploading - Uploading generated training model[34m[2025-05-25 00:58:23.444 ip-10-0-82-210.us-east-2.compute.internal:7 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34mINFO:sagemaker-containers:Imported framework sagemaker_xgboost_container.training[0m
[34mINFO:sagemaker-containers:Failed to parse hyperparameter objective value reg:squarederror to Json.[0m
[34mReturning the value itself[0m
[34mINFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)[0m
[34mINFO:sagemaker_xgboost_container.training:Running XGBoost Sagemaker in algorithm mode[0m
[34mERROR:sagemaker-containers:Repor

In [None]:
Deploy Model

In [None]:
predictor = xgb.deploy(
    initial_instance_count=1,
    instance_type='ml.t2.medium',
    endpoint_name='wine-quality-predictor'
)


In [None]:
# Test with sample red wine data

In [None]:
import numpy as np
sample = [7.4, 0.7, 0.0, 1.9, 0.076, 11, 34, 0.9978, 3.51, 0.56, 9.4, 0]  # Last 0 = red wine
prediction = predictor.predict(np.array([sample]))
print(f"Predicted wine quality: {prediction}")

In [None]:
Clean Up

In [None]:
predictor.delete_endpoint()