In [2]:
from google.colab import userdata

AWS_ACCESS_KEY_ID = userdata.get('AWS_ACCESS_KEY_ID')
AWS_SECRET_ACCESS_KEY = userdata.get('AWS_SECRET_ACCESS_KEY')
AWS_REGION = userdata.get('AWS_REGION')

In [3]:
import json
from datetime import datetime
import yfinance as yf
from io import BytesIO

In [4]:
def data_extraction(ticker):
  try:
    print(f"Extracting data for {ticker} ...")
    asset = yf.Ticker(ticker)
    data = asset.info

    if not data:
      print(f"No data found for {ticker}")
      return None

    data['download_timestamp'] = datetime.now().isoformat()
    return data

  except Exception as e:
    print(f"Error at data extraction: {e}")
    return None

In [5]:
!pip install boto3 -q
import boto3

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/140.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m140.6/140.6 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.6/14.6 MB[0m [31m73.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.8/86.8 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [6]:
def s3_data_load(data, ticker):
  s3 = boto3.resource(
      service_name = 's3',
      region_name = AWS_REGION,
      aws_access_key_id = AWS_ACCESS_KEY_ID,
      aws_secret_access_key = AWS_SECRET_ACCESS_KEY
  ) # resources for s3 connection

  # path configs
  bucket_name = 'stock-market-monitoring'
  now = datetime.now()
  partition_date = now.strftime('%Y-%m-%d')
  timestamp = now.strftime('%H%M%S')

  file_path = f'bronze/yahoo_finance/ticker={ticker}/extraction_date={partition_date}/{timestamp}.json'

  try:
    json_data = json.dumps(data, ensure_ascii=False).encode('utf-8') # transform data into json file

    s3.Object(bucket_name, file_path).put(Body=json_data) # load data into s3

    print(f"Upload Done: s3://{bucket_name}/{file_path}")
    return file_path
  except Exception as e:
    print(f"Error at S3 upload: {e}")
    return None

In [7]:
ticker_list = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'META', 'TSLA', 'NVDA', 'NFLX', 'TSM']

def run_full_ingestion(ticker_list):
  print(f'Initializing data ingestion of {len(ticker_list)} assets ...')

  for ticker in ticker_list:
    try:
      data = data_extraction(ticker)
      if data:
        s3_data_load(data, ticker)
        print(f'{ticker} assets loaded')
    except Exception as e:
      print(f'Error at {ticker}: {e}')
      continue
  print('Ingestion completed')

In [8]:
# Execution
if __name__ == '__main__':
  run_full_ingestion(ticker_list)

Initializing data ingestion of 9 assets ...
Extracting data for AAPL ...
Upload Done: s3://stock-market-monitoring/bronze/yahoo_finance/ticker=AAPL/extraction_date=2026-01-16/202729.json
AAPL assets loaded
Extracting data for MSFT ...
Upload Done: s3://stock-market-monitoring/bronze/yahoo_finance/ticker=MSFT/extraction_date=2026-01-16/202730.json
MSFT assets loaded
Extracting data for GOOGL ...
Upload Done: s3://stock-market-monitoring/bronze/yahoo_finance/ticker=GOOGL/extraction_date=2026-01-16/202731.json
GOOGL assets loaded
Extracting data for AMZN ...
Upload Done: s3://stock-market-monitoring/bronze/yahoo_finance/ticker=AMZN/extraction_date=2026-01-16/202732.json
AMZN assets loaded
Extracting data for META ...
Upload Done: s3://stock-market-monitoring/bronze/yahoo_finance/ticker=META/extraction_date=2026-01-16/202734.json
META assets loaded
Extracting data for TSLA ...
Upload Done: s3://stock-market-monitoring/bronze/yahoo_finance/ticker=TSLA/extraction_date=2026-01-16/202735.json
