In [17]:
import requests
import pandas as pd
from dotenv import load_dotenv
import os
import json
import datetime
from io import StringIO
import boto3
import os

load_dotenv()
pd.set_option('display.width', 1000)
pd.set_option('display.max_columns', None)

In [10]:
def extractData(symbol, interval, range):
    url = "https://yh-finance.p.rapidapi.com/stock/v3/get-chart"
    querystring = {  "region":"US", "lang":"en", "symbol": symbol, "interval":interval,
                   "range": range, "events":"capitalGain,div,split"}
    headers = {"X-RapidAPI-Key": os.environ.get("RAPIDAPI_KEY"),
    "X-RapidAPI-Host": os.environ.get("RAPIDAPI_HOST")}
    response = requests.request("GET", url, headers=headers, params=querystring)
    if response.ok:
        data = json.loads(response.content)
        return data["chart"]["result"][0]
    return None

response = extractData("V", "1m", "1d")
print(response)

{'meta': {'currency': 'USD', 'symbol': 'V', 'exchangeName': 'NYQ', 'instrumentType': 'EQUITY', 'firstTradeDate': 1205933400, 'regularMarketTime': 1677877202, 'gmtoffset': -18000, 'timezone': 'EST', 'exchangeTimezoneName': 'America/New_York', 'regularMarketPrice': 223.77, 'chartPreviousClose': 219.06, 'previousClose': 219.06, 'scale': 3, 'priceHint': 2, 'currentTradingPeriod': {'pre': {'timezone': 'EST', 'start': 1678093200, 'end': 1678113000, 'gmtoffset': -18000}, 'regular': {'timezone': 'EST', 'start': 1678113000, 'end': 1678136400, 'gmtoffset': -18000}, 'post': {'timezone': 'EST', 'start': 1678136400, 'end': 1678150800, 'gmtoffset': -18000}}, 'tradingPeriods': [[{'timezone': 'EST', 'start': 1677853800, 'end': 1677877200, 'gmtoffset': -18000}]], 'dataGranularity': '1m', 'range': '1d', 'validRanges': ['1d', '5d', '1mo', '3mo', '6mo', '1y', '2y', '5y', '10y', 'ytd', 'max']}, 'timestamp': [1677853800, 1677853860, 1677853920, 1677853980, 1677854040, 1677854100, 1677854160, 1677854220, 167

In [11]:
def round_decimal(numbers):
    for i in range(len(numbers)):
        numbers[i] = round(numbers[i],2)
    return numbers

In [12]:
#Functions for creating dataframe from response object
def create_df_metadata(metadata_response):
    trading_periods = metadata_response["tradingPeriods"]
    start_time = datetime.datetime.fromtimestamp(trading_periods[0][0]['start']).strftime("%H:%M")
    end_time = datetime.datetime.fromtimestamp(trading_periods[0][0]['end']).strftime("%H:%M")
    period = start_time + " - " + end_time

    important_columns = {  "symbol": metadata_response["symbol"], 
                         "instrumentType":metadata_response["instrumentType"],
                         "regularMarketPrice":metadata_response["regularMarketPrice"],
                         "previousClose":metadata_response["previousClose"], "trade_period":period,
                         'timezone': metadata_response['timezone'], "range": metadata_response["range"],
                         "interval": metadata_response["dataGranularity"]}
    df_metadata = pd.DataFrame(important_columns, index =[0])

    return df_metadata

def create_df_timestamp(timestamp_response):
    timestamps = response["timestamp"]
    converted_timestamp = []
    for timestamp in timestamps:
        converted_timestamp.append(datetime.datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d %H:%M:%S"))
    df_timestamp = pd.DataFrame({"timestamp": converted_timestamp})
    return df_timestamp


def create_df_indicators(indicators_response):
    indicators = response["indicators"]["quote"]
    indicators_dictionary = {"Volume": round_decimal(indicators[0]["volume"]),
                         "Open": round_decimal(indicators[0]["open"]),
                         "Close": round_decimal(indicators[0]["close"]),
                         "High": round_decimal(indicators[0]["high"]),
                         "Low": round_decimal(indicators[0]["low"])}

    df_indicators = pd.DataFrame(indicators_dictionary)

    return df_indicators
df_metadata = create_df_metadata(response["meta"])
print(df_metadata)

  symbol instrumentType  regularMarketPrice  previousClose   trade_period timezone range interval
0      V         EQUITY              223.77         219.06  09:30 - 16:00      EST    1d       1m


In [13]:
#Create dataframe for indicator and timestamp and merge all dataframe together
df_indicator = create_df_indicators(response["indicators"])
df_timestamp = create_df_timestamp(response["timestamp"])
df_merged = pd.concat([df_metadata["symbol"], df_indicator,df_timestamp], axis=1)
df_merged["symbol"] = df_merged["symbol"].ffill()
print(df_merged)

    symbol  Volume    Open   Close    High     Low            timestamp
0        V  114079  220.20  220.36  220.54  219.90  2023-03-03 09:30:00
1        V   19257  220.33  220.34  220.45  220.10  2023-03-03 09:31:00
2        V   12650  220.37  220.18  220.37  220.11  2023-03-03 09:32:00
3        V   21735  220.20  220.24  220.38  220.12  2023-03-03 09:33:00
4        V   34754  220.24  220.80  220.84  220.23  2023-03-03 09:34:00
..     ...     ...     ...     ...     ...     ...                  ...
386      V   52870  223.94  223.84  224.01  223.82  2023-03-03 15:56:00
387      V   46600  223.84  223.88  223.94  223.83  2023-03-03 15:57:00
388      V   64575  223.88  223.80  223.89  223.62  2023-03-03 15:58:00
389      V   79526  223.80  223.71  223.88  223.68  2023-03-03 15:59:00
390      V       0  223.77  223.77  223.77  223.77  2023-03-03 16:00:00

[391 rows x 7 columns]


In [84]:
#Transform DF to CSV
def to_csv(df):
    df_buffer = StringIO()
    df.to_csv(df_buffer, index=False)
    df_csv = df_buffer.getvalue()
    return df_csv
metadata_csv = to_csv(df_metadata)
stockPrice_csv = to_csv(df_merged)
print(metadata_csv)


symbol,instrumentType,regularMarketPrice,previousClose,trade_period,timezone,range,interval
V,EQUITY,223.77,219.06,09:30 - 16:00,EST,1d,1m



In [88]:
print(stockPrice_csv)

symbol,Volume,Open,Close,High,Low,timestamp
V,114079,220.2,220.36,220.54,219.9,2023-03-03 09:30:00
V,19257,220.33,220.34,220.45,220.1,2023-03-03 09:31:00
V,12650,220.37,220.18,220.37,220.11,2023-03-03 09:32:00
V,21735,220.2,220.24,220.38,220.12,2023-03-03 09:33:00
V,34754,220.24,220.8,220.84,220.23,2023-03-03 09:34:00
V,22448,220.76,220.26,220.8,220.25,2023-03-03 09:35:00
V,21668,220.21,220.04,220.26,220.04,2023-03-03 09:36:00
V,95501,220.04,220.01,220.1,219.99,2023-03-03 09:37:00
V,15593,220.02,220.0,220.05,219.92,2023-03-03 09:38:00
V,16187,219.99,219.96,220.19,219.94,2023-03-03 09:39:00
V,12569,219.97,219.96,220.06,219.94,2023-03-03 09:40:00
V,13903,219.99,220.11,220.17,219.95,2023-03-03 09:41:00
V,11641,220.15,220.12,220.15,219.94,2023-03-03 09:42:00
V,38044,220.15,219.99,220.16,219.99,2023-03-03 09:43:00
V,10043,219.99,219.96,220.04,219.94,2023-03-03 09:44:00
V,18488,220.02,220.04,220.15,219.98,2023-03-03 09:45:00
V,13002,220.07,220.05,220.21,220.04,2023-03-03 09:46:00
V,33362,220

In [91]:
#Create S3 instance
def create_s3():
    s3_client = boto3.client("s3", region_name = "us-east-1",
                                aws_access_key_id=os.environ.get("AWS_ACCESS_KEY"), 
                                aws_secret_access_key=os.environ.get("AWS_SECRET_ACCESSKEY"))
    response = s3_client.list_buckets()
    for bucket in response["Buckets"]:
        print(f"{bucket['Name']}")
    return s3_client

#Upload file to s3
def upload_to_s3(s3, folder, bucket, body):
    metadata_columns_dict = dict(df_metadata.loc[0])
    timestamp_dict = dict(df_merged.loc[:0, "timestamp"])

    timestamp = timestamp_dict[0]
    year = timestamp[:4] 
    month = timestamp[5:7]  
    date = timestamp[8:10]
    
    key = os.path.join(metadata_columns["symbol"], folder, year, month,f"{year}{month}{date}.csv")
    try:
        s3.head_object(Bucket=bucket, Key=key)
        print(f"File {key} already exists in bucket {bucket}")
    except:
        try:
            s3.put_object(Bucket=bucket, Body=body, Key=key)
            print(f"Uploaded file {key} to bucket {bucket}")
        except Exception as e:
            print(f"Error uploading file {key} to bucket {bucket}: {e}")

In [92]:
s3 = create_s3()
upload_to_s3(s3, "metadata", "stockds", metadata_csv)
upload_to_s3(s3, "stock_price", "stockds", stockPrice_csv) 

ibmsalesproject
stockds
File V/metadata/2023/03/20230303.csv already exists in bucket stockds
File V/stock_price/2023/03/20230303.csv already exists in bucket stockds
