# ETL Process

Extract Data from Alpha Vantage API

In [1]:
import requests
import pandas as pd
import boto3
from io import StringIO
 
# Fetch data from Alpha Vantage API
api_url = "https://www.alphavantage.co/query"
params = {
    "function": "TIME_SERIES_MONTHLY_ADJUSTED",
    "symbol": "IBM",
    "apikey": ".."
}

response = requests.get(api_url, params=params)
data = response.json()

Transform data

In [2]:
# Convert JSON to DataFrame
df = pd.DataFrame(data["Monthly Adjusted Time Series"]).T

# Rename columns before saving
df.rename(columns={
    "1. open": "Opening Price",
    "2. high": "Highest Price",
    "3. low": "Lowest Price",
    "4. close": "Closing Price",
    "5. adjusted close": "Adjusted Closing Price",
    "6. volume": "Trading Volume",
    "7. dividend amount": "Dividend per Share"
}, inplace=True)

# Handle missing values 
df.dropna(inplace=True) 


Load the transformed data to S3

In [3]:
# Convert DataFrame to CSV (in-memory)
csv_buffer = StringIO()
df.to_csv(csv_buffer, index=True)

# AWS S3 Configurations
s3 = boto3.client(
    "s3",
    aws_access_key_id="..",
    aws_secret_access_key="..",
    region_name="eu-north-1"  
)

# Define bucket name and file path in S3
bucket_name = "nimb-coursework-q1"
s3_file_path = "finance-data/IBM_Monthly_Stock_Data.csv"

# Upload to S3
s3.put_object(Bucket=bucket_name, Key=s3_file_path, Body=csv_buffer.getvalue())

print(f"File successfully uploaded to S3: s3://{bucket_name}/{s3_file_path}")


File successfully uploaded to S3: s3://nimb-coursework-q1/finance-data/IBM_Monthly_Stock_Data.csv
