In [1]:
!pip install sagemaker -U

Collecting sagemaker
  Downloading sagemaker-2.244.2-py3-none-any.whl.metadata (17 kB)
Downloading sagemaker-2.244.2-py3-none-any.whl (1.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m56.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sagemaker
  Attempting uninstall: sagemaker
    Found existing installation: sagemaker 2.242.0
    Uninstalling sagemaker-2.242.0:
      Successfully uninstalled sagemaker-2.242.0
Successfully installed sagemaker-2.244.2


In [2]:
import sagemaker
import boto3
from sagemaker.huggingface import HuggingFaceModel

try:
	role = sagemaker.get_execution_role()
except ValueError:
	iam = boto3.client('iam')
	role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']
print(role)



sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
arn:aws:iam::207567756516:role/service-role/AmazonSageMaker-ExecutionRole-20250429T171998


In [3]:
# Hub Model configuration. https://huggingface.co/models
hub = {
	'HF_MODEL_ID':'ProsusAI/finbert',
	'HF_TASK':'text-classification'
}

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
	transformers_version='4.37.0',
	pytorch_version='2.1.0',
	py_version='py310',
	env=hub,
	role=role, 
)

In [4]:
# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
	initial_instance_count=1, # number of instances
	instance_type='ml.m5.xlarge' # ec2 instance type
)

---------!

In [6]:
import pandas as pd

s3_path = "s3://financial-analysis-project-bucket/SageMaker/combined_clean_news.csv"

df = pd.read_csv(s3_path)
df.count()

date      1401732
symbol    1401732
title     1401732
dtype: int64

In [7]:
df = df[0:1000]
df.count()

date      1000
symbol    1000
title     1000
dtype: int64

In [8]:
titles = df['title'].to_list()

results = []

batch_size = 100
for i in range(0, len(titles), batch_size):
    prediction = predictor.predict({"inputs": titles[i: i + batch_size]})
    print(i + batch_size)
    results.extend([d['label'] for d in prediction])

100
200
300
400
500
600
700
800
900
1000


In [9]:
print(len(results))

1000


In [10]:
# add sentiment back to original datafrmae
df['sentiment'] = results

In [11]:
df.head(10)

Unnamed: 0,date,symbol,title,sentiment
0,2023-12-16,A,Interesting A Put And Call Options For August ...,neutral
1,2023-12-12,A,Wolfe Research Initiates Coverage of Agilent T...,positive
2,2023-12-12,A,Agilent Technologies Reaches Analyst Target Price,positive
3,2023-12-07,A,Agilent (A) Enhances BioTek Cytation C10 With ...,positive
4,2023-12-07,A,"Pre-Market Most Active for Dec 7, 2023 : SQQQ,...",neutral
5,2023-12-05,A,A Quantitative Stock Analysis,neutral
6,2023-11-24,A,"The Zacks Analyst Blog Highlights Visa, Marrio...",neutral
7,2023-11-22,A,3 Biotech Stocks With Big-Time Breakthrough Po...,positive
8,2023-11-22,A,"Top Research Reports for Visa, Marriott Intern...",neutral
9,2023-11-21,A,"Tuesday Sector Leaders: Healthcare, Consumer P...",neutral


In [13]:
# save original news data + sentiment to S3
bucket_name = 'financial-analysis-project-bucket'
key = 'SageMaker/news_sentiment.csv'  # Path within the bucket
s3_path = f's3://{bucket_name}/{key}'
df.to_csv(s3_path, index=False)