In [1]:
import pandas as pd 
import numpy as np 

In [2]:
import boto3
from sklearn.model_selection import train_test_split
import sagemaker
from sagemaker import session 

In [3]:
import io
import sagemaker.amazon.common as smac
import os

In [4]:
from sagemaker.amazon.amazon_estimator import image_uris

In [44]:
df = pd.read_csv('student_scores.csv')

In [45]:
df.head(5)

Unnamed: 0,Hours,Scores
0,2.5,21
1,5.1,47
2,3.2,27
3,8.5,75
4,3.5,30


In [46]:
df.shape

(25, 2)

In [47]:
#seprate 
x = df[['Hours']]
y = df[['Scores']]

In [48]:
x = x.astype('float32')
y = y.astype('float32')

In [49]:
# split data 
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=42, test_size= 0.2)

In [50]:
x_train.head(5)

Unnamed: 0,Hours
9,2.7
13,3.3
1,5.1
22,3.8
5,1.5


In [51]:
# reset the index 

x_train = x_train.reset_index(drop= True)
x_test = x_test.reset_index(drop= True)
y_train = y_train.reset_index(drop= True)
y_test = x_test.reset_index(drop= True)

In [52]:
x_train.head(5)

Unnamed: 0,Hours
0,2.7
1,3.3
2,5.1
3,3.8
4,1.5


In [53]:
# we need to take label column as vector 

y_train = y_train.iloc[:,0]

In [54]:
y_train.head(5)

0    25.0
1    42.0
2    47.0
3    35.0
4    20.0
Name: Scores, dtype: float32

In [55]:
y_test = y_test.iloc[:,0]

In [56]:
# let create sagemaker session

sagemaker_session = sagemaker.Session()

#define a bucket name 

bucket_name = 'saurabh-sagemaker'

# define the prefiex (folder)

prefix = 'liner-regression-model'

# get the execuation role

role = sagemaker.get_execution_role()



In [57]:
# convert x_train to numpy 

x_train = np.array(x_train)

In [58]:
#create buffer

buf = io.BytesIO()
smac.write_numpy_to_dense_tensor(buf, x_train, y_train)

buf.seek(0)

0

In [59]:
import boto3

In [60]:
# store the data to s3 bucket

# define file name 
key = 'student-data'

#code to upload in s3
boto3.resource('s3').Bucket(bucket_name).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf)

# path of our data
s3_train_data = f"s3://{bucket_name}/{prefix}/train/{key}"

print('data upload', s3_train_data)

data upload s3://saurabh-sagemaker/liner-regression-model/train/student-data


In [61]:
# now upload test data to s3 bucket
# convert x_test to numpy array

x_test = np.array(x_test)


#create buffer

buf = io.BytesIO()
smac.write_numpy_to_dense_tensor(buf, x_test, y_test)

buf.seek(0)



# store the data to s3 bucket

# define file name 
key = 'student-test-data'

#code to upload in s3
boto3.resource('s3').Bucket(bucket_name).Object(os.path.join(prefix, 'test', key)).upload_fileobj(buf)

# path of our data
s3_train_data = f"s3://{bucket_name}/{prefix}/test/{key}"

print('data upload', s3_train_data)

data upload s3://saurabh-sagemaker/liner-regression-model/test/student-test-data


In [62]:
# output location 

output_location = f"s3://{bucket_name}/{prefix}/output"

output_location

's3://saurabh-sagemaker/liner-regression-model/output'

In [63]:
# bring container creating model 

container = sagemaker.image_uris.retrieve(
    framework="linear-learner",
    region=boto3.Session().region_name
)

print(container)

INFO:sagemaker.image_uris:Same images used for training and inference. Defaulting to image scope: inference.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.


991648021394.dkr.ecr.ap-south-1.amazonaws.com/linear-learner:1


In [64]:
# Create the estimator
linear = sagemaker.estimator.Estimator(
    image_uri=container,                     # Algorithm container
    role= role,
    instance_count=1,                         # Number of training instances
    instance_type="ml.c4.xlarge",             # Instance type
    output_path= output_location,   # Where model artifacts are stored
    sagemaker_session=sagemaker.Session())

In [65]:
# Set hyperparameters

linear.set_hyperparameters(
    feature_dim = 1,   # Number of input features
    predictor_type="regressor",    # Since it's linear regression
    mini_batch_size= 4,            # Batch size
    epochs=6,                     # Number of passes through data
    loss="absolute_loss",            # L1 loss
    num_models =32                # Train multiple models in parallel
)

In [66]:
# fit model 

linear.fit({'train': s3_train_data})

INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker:Creating training-job with name: linear-learner-2025-08-12-08-48-52-285


2025-08-12 08:48:53 Starting - Starting the training job...
2025-08-12 08:49:09 Starting - Preparing the instances for training...
2025-08-12 08:49:29 Downloading - Downloading input data...
2025-08-12 08:50:15 Downloading - Downloading the training image.........
2025-08-12 08:51:51 Training - Training image download completed. Training in progress..[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
[34m[08/12/2025 08:51:59 INFO 140639117055808] Reading default configuration from /opt/amazon/lib/python3.8/site-packages/algorithm/resources/default-input.json: {'mini_batch_size': '1000', 'epochs': '15', 'feature_dim': 'auto', 'use_bias': 'true', 'binary_classifier_model_selection_criteria': 'accuracy', 'f_beta': '1.0', 'target_recall': '0.8', 'target_precision': '0.8', 'num_models': 'auto', 'num_calibration_samples': '10000000', 'init_method': 'uniform', 'init_scale': '0.07', 'init_sigma': '0.01', 'init_bias': '0.0', 'opt

In [67]:
# depoly the model 

liner_regresor = linear.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')

INFO:sagemaker:Creating model with name: linear-learner-2025-08-12-08-56-36-621
INFO:sagemaker:Creating endpoint-config with name linear-learner-2025-08-12-08-56-36-621
INFO:sagemaker:Creating endpoint with name linear-learner-2025-08-12-08-56-36-621


-------!

In [68]:
liner_regresor.delete_endpoint

<bound method Predictor.delete_endpoint of <sagemaker.base_predictor.Predictor object at 0x7feaf0ad1520>>