- following code is copied from sagemaker

In [None]:
import boto3
import sagemaker
import pandas as pd
from sagemaker import get_execution_role
from sagemaker.sklearn.estimator import SKLearn


In [None]:
# Set up SageMaker session and role
sagemaker_session = sagemaker.Session()
role = get_execution_role()
bucket = sagemaker_session.default_bucket()
prefix = 'news-classification'


In [None]:
# Upload dataset to S3
s3_input_path = sagemaker_session.upload_data(
    path='../news_aggregator_dataset/newsCorpora.csv',
    bucket=bucket,
    key_prefix=f'{prefix}/data'
)


In [None]:
# Define the estimator
sklearn_estimator = SKLearn(
    entry_point='train.py',
    framework_version='0.23-1',
    instance_type='ml.m5.large',
    role=role,
    sagemaker_session=sagemaker_session,
    output_path=f's3://{bucket}/{prefix}/output'
)

# Start training
sklearn_estimator.fit({'train': s3_input_path})


In [None]:
# Deploy the model
predictor = sklearn_estimator.deploy(
    initial_instance_count=1,
    instance_type='ml.m5.large'
)


In [None]:
# Make predictions
test_data = ["The stock market is experiencing unprecedented growth."]
vectorizer = joblib.load('tfidf_vectorizer.pkl')  # Load local TF-IDF vectorizer for preprocessing
test_vectorized = vectorizer.transform(test_data)

# Send prediction request
response = predictor.predict(test_vectorized)


In [None]:
# Delete the endpoint after testing
predictor.delete_endpoint()
