# EdTech
Elias Ejo | Jun-16-2023

# Importing the required libraries

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split

import sagemaker, boto3
from sagemaker import get_execution_role
from sagemaker.estimator import Estimator
from sagemaker.inputs import TrainingInput

Project Task: Week 4
Deployment In Sagemaker

In [6]:
df_comments = pd.read_csv("Vid_N-DQ8iDlH_U_comments.csv")

In [7]:
df_comments = df_comments.drop(["Unnamed: 0"],axis=1)

In [8]:
df_comments.head()

Unnamed: 0,Comment,SentimentLabel
0,Dear Sir or the institue IIT Kanpur please upl...,Positive
1,It would be good if there is syllabus detailsi...,Neutral
2,where I have to write r code,Neutral
3,a href,Neutral
4,Sir i39m not finding this course in nptel brwh...,Neutral


In [9]:
# store data values
comments = df_comments["Comment"]

# store target values
labels  = df_comments["SentimentLabel"]

In [25]:
comments

0    Dear Sir or the institue IIT Kanpur please upl...
1    It would be good if there is syllabus detailsi...
2                         where I have to write r code
3                                               a href
4    Sir i39m not finding this course in nptel brwh...
5                        is this the 10 hr course on r
6                          This has been helpfulthanks
7    the same video should be posted in registered ...
Name: Comment, dtype: object

In [10]:
# Split comments data into a training set and a test set.
X_train, X_test, y_train, y_test = train_test_split(comments, labels, test_size=0.2, random_state=42)

In [11]:
# Prepare Data for SageMaker
train_data = pd.DataFrame({'comments': X_train, 'labels': y_train})
test_data = pd.DataFrame({'comments': X_test, 'labels': y_test})

train_data.to_csv('train.csv', index=False, header=False)
test_data.to_csv('test.csv', index=False, header=False)

In [12]:
# Obtain the execution role retrieves the IAM role that is associated with the current AWS execution environment
role = get_execution_role()

# Create a SageMaker session
sagemaker_session = sagemaker.Session()

In [14]:
# Get the default Amazon S3 bucket associated with current user SageMaker session.
bucket = sagemaker_session.default_bucket()
print(f'Our SM bucket: {bucket}')

Our SM bucket: sagemaker-us-east-1-733003498961


In [15]:
# the location to store the training data or the output mode
file_path = "Comments"

In [16]:
# uploads train.csv to the specified S3 bucket using the Boto3 library.
boto3.Session().resource("s3").Bucket(bucket).Object(os.path.join(file_path, "train.csv")).upload_file("train.csv")

# uploads test.csv to the specified S3 bucket using the Boto3 library.
boto3.Session().resource("s3").Bucket(bucket).Object(os.path.join(file_path, "test.csv")).upload_file("test.csv")

In [18]:
# Specify the container image for XGBoost
container = sagemaker.image_uris.retrieve('xgboost', sagemaker_session.boto_region_name, "latest")

In [20]:
# creates a TrainingInput object that represents the training data location in Amazon S3 for an Amazon SageMaker training job
train_data_uri = TrainingInput(s3_data="s3://{}/{}/train".format(bucket, file_path), content_type="csv")

# creates a TrainingInput object that represents the validation data location in Amazon S3 for an Amazon SageMaker training job
test_data_uri = TrainingInput(s3_data="s3://{}/{}/test".format(bucket, file_path), content_type="csv")


In [21]:
# Create an Estimator object and set hyperparameters
estimator = Estimator(container,
                      role,
                      instance_count=1,
                      instance_type='ml.m4.xlarge',
                      output_path="s3://{}/{}/output".format(bucket, file_path),
                      sagemaker_session=sagemaker_session)

In [22]:
# Set hyperparameters for the XGBoost model
estimator.set_hyperparameters(max_depth=5,
                              eta=0.2,
                              gamma=4,
                              min_child_weight=6,
                              subsample=0.8,
                              objective='multi:softmax',
                               num_class=3,
                              num_round=100)



In [25]:
# Train the model using the training data
estimator.fit({'train': train_data_uri, 'validation': test_data_uri})

ClientError: An error occurred (AccessDeniedException) when calling the CreateTrainingJob operation: User: arn:aws:sts::733003498961:assumed-role/sagemakerlab/SageMaker is not authorized to perform: sagemaker:CreateTrainingJob on resource: arn:aws:sagemaker:us-east-1:733003498961:training-job/xgboost-2023-06-13-03-33-31-357 with an explicit deny in a service control policy

In [24]:
# Deploy the trained model as an endpoint
predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')

ValueError: Estimator is not associated with a training job