In [1]:
import import_ipynb
import pandas as pd
from extract_data import extract_data_from_api
import datetime
from io import StringIO
import boto3
import os

importing Jupyter notebook from extract_data.ipynb


In [2]:
response = extract_data_from_api("V", "1m", "1d")
print(response)

{'meta': {'currency': 'USD', 'symbol': 'V', 'exchangeName': 'NYQ', 'instrumentType': 'EQUITY', 'firstTradeDate': 1205933400, 'regularMarketTime': 1678136402, 'gmtoffset': -18000, 'timezone': 'EST', 'exchangeTimezoneName': 'America/New_York', 'regularMarketPrice': 226.75, 'chartPreviousClose': 223.77, 'previousClose': 223.77, 'scale': 3, 'priceHint': 2, 'currentTradingPeriod': {'pre': {'timezone': 'EST', 'start': 1678179600, 'end': 1678199400, 'gmtoffset': -18000}, 'regular': {'timezone': 'EST', 'start': 1678199400, 'end': 1678222800, 'gmtoffset': -18000}, 'post': {'timezone': 'EST', 'start': 1678222800, 'end': 1678237200, 'gmtoffset': -18000}}, 'tradingPeriods': [[{'timezone': 'EST', 'start': 1678113000, 'end': 1678136400, 'gmtoffset': -18000}]], 'dataGranularity': '1m', 'range': '1d', 'validRanges': ['1d', '5d', '1mo', '3mo', '6mo', '1y', '2y', '5y', '10y', 'ytd', 'max']}, 'timestamp': [1678113000, 1678113060, 1678113120, 1678113180, 1678113240, 1678113300, 1678113360, 1678113420, 167

In [3]:
def round_decimal(numbers):
    for i in range(len(numbers)):
        if numbers[i] is not None:
            numbers[i] = round(numbers[i],2)
    return numbers

In [4]:
#Functions for creating dataframe from response object
def create_df_metadata(metadata_response):
    trading_periods = metadata_response["tradingPeriods"]
    start_time = datetime.datetime.fromtimestamp(trading_periods[0][0]['start']).strftime("%H:%M")
    end_time = datetime.datetime.fromtimestamp(trading_periods[0][0]['end']).strftime("%H:%M")
    period = start_time + " - " + end_time

    important_columns = {  "symbol": metadata_response["symbol"], 
                         "instrumentType":metadata_response["instrumentType"],
                         "regularMarketPrice":metadata_response["regularMarketPrice"],
                         "previousClose":metadata_response["previousClose"], "trade_period":period,
                         'timezone': metadata_response['timezone'], "range": metadata_response["range"],
                         "interval": metadata_response["dataGranularity"]}
    df_metadata = pd.DataFrame(important_columns, index =[0])

    return df_metadata

def create_df_timestamp(timestamp_response):
    timestamps = response["timestamp"]
    converted_timestamp = []
    for timestamp in timestamps:
        converted_timestamp.append(datetime.datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d %H:%M:%S"))
    df_timestamp = pd.DataFrame({"timestamp": converted_timestamp})
    return df_timestamp


def create_df_indicators(indicators_response):
    indicators = response["indicators"]["quote"]
    indicators_dictionary = {"Volume": round_decimal(indicators[0]["volume"]),
                         "Open": round_decimal(indicators[0]["open"]),
                         "Close": round_decimal(indicators[0]["close"]),
                         "High": round_decimal(indicators[0]["high"]),
                         "Low": round_decimal(indicators[0]["low"])}

    df_indicators = pd.DataFrame(indicators_dictionary)

    return df_indicators
df_metadata = create_df_metadata(response["meta"])
print(df_metadata)

  symbol instrumentType  regularMarketPrice  previousClose   trade_period  \
0      V         EQUITY              226.75         223.77  09:30 - 16:00   

  timezone range interval  
0      EST    1d       1m  


In [5]:
#Create dataframe for indicator and timestamp and merge all dataframe together
df_indicator = create_df_indicators(response["indicators"])
df_timestamp = create_df_timestamp(response["timestamp"])
df_merged = pd.concat([df_metadata["symbol"], df_indicator,df_timestamp], axis=1)
df_merged["symbol"] = df_merged["symbol"].ffill()
print(df_merged)

    symbol    Volume    Open   Close    High     Low            timestamp
0        V  141711.0  223.93  223.77  223.94  223.69  2023-03-06 09:30:00
1        V   41588.0  223.74  223.98  224.04  223.71  2023-03-06 09:31:00
2        V   47438.0  224.00  224.41  224.45  223.96  2023-03-06 09:32:00
3        V   18519.0  224.50  224.66  224.70  224.42  2023-03-06 09:33:00
4        V   20462.0  224.72  224.57  224.72  224.46  2023-03-06 09:34:00
..     ...       ...     ...     ...     ...     ...                  ...
386      V   33665.0  226.92  226.80  226.93  226.78  2023-03-06 15:56:00
387      V   45339.0  226.80  226.71  226.80  226.70  2023-03-06 15:57:00
388      V   52680.0  226.72  226.85  226.87  226.69  2023-03-06 15:58:00
389      V   99239.0  226.86  226.77  226.89  226.69  2023-03-06 15:59:00
390      V       0.0  226.75  226.75  226.75  226.75  2023-03-06 16:00:00

[391 rows x 7 columns]


In [6]:
#Transform DF to CSV
def to_csv(df):
    df_buffer = StringIO()
    df.to_csv(df_buffer, index=False)
    df_csv = df_buffer.getvalue()
    return df_csv
metadata_csv = to_csv(df_metadata)
stockPrice_csv = to_csv(df_merged)
print(metadata_csv)

symbol,instrumentType,regularMarketPrice,previousClose,trade_period,timezone,range,interval
V,EQUITY,226.75,223.77,09:30 - 16:00,EST,1d,1m



In [7]:
print(stockPrice_csv)

symbol,Volume,Open,Close,High,Low,timestamp
V,141711.0,223.93,223.77,223.94,223.69,2023-03-06 09:30:00
V,41588.0,223.74,223.98,224.04,223.71,2023-03-06 09:31:00
V,47438.0,224.0,224.41,224.45,223.96,2023-03-06 09:32:00
V,18519.0,224.5,224.66,224.7,224.42,2023-03-06 09:33:00
V,20462.0,224.72,224.57,224.72,224.46,2023-03-06 09:34:00
V,23054.0,224.59,224.74,224.76,224.55,2023-03-06 09:35:00
V,12740.0,224.77,224.84,224.87,224.71,2023-03-06 09:36:00
V,19061.0,224.85,224.59,224.88,224.59,2023-03-06 09:37:00
V,12634.0,224.64,224.84,224.88,224.53,2023-03-06 09:38:00
V,12747.0,224.88,224.86,224.95,224.84,2023-03-06 09:39:00
V,25848.0,224.89,225.06,225.17,224.83,2023-03-06 09:40:00
V,9744.0,225.09,225.24,225.25,225.05,2023-03-06 09:41:00
V,13012.0,225.29,225.2,225.37,225.15,2023-03-06 09:42:00
V,24874.0,225.19,225.53,225.58,225.14,2023-03-06 09:43:00
V,23352.0,225.53,225.23,225.55,225.14,2023-03-06 09:44:00
V,12104.0,225.26,225.16,225.36,225.14,2023-03-06 09:45:00
V,15230.0,225.1,225.01,225.1,224

In [8]:
#Create S3 instance
def create_s3():
    s3_client = boto3.client("s3", region_name = "us-east-1",
                                aws_access_key_id=os.environ.get("AWS_ACCESS_KEY"), 
                                aws_secret_access_key=os.environ.get("AWS_SECRET_ACCESSKEY"))
    response = s3_client.list_buckets()
    for bucket in response["Buckets"]:
        print(f"{bucket['Name']}")
    return s3_client

#Upload file to s3
def upload_to_s3(s3, folder, bucket, body):
    metadata_columns_dict = dict(df_metadata.loc[0])
    timestamp_dict = dict(df_merged.loc[:0, "timestamp"])

    timestamp = timestamp_dict[0]
    year = timestamp[:4] 
    month = timestamp[5:7]  
    date = timestamp[8:10]
    
    key = os.path.join(metadata_columns_dict["symbol"], folder, year, month,f"{year}{month}{date}.csv")
    try:
        s3.head_object(Bucket=bucket, Key=key)
        print(f"File {key} already exists in bucket {bucket}")
    except:
        try:
            s3.put_object(Bucket=bucket, Body=body, Key=key)
            print(f"Uploaded file {key} to bucket {bucket}")
        except Exception as e:
            print(f"Error uploading file {key} to bucket {bucket}: {e}")

In [9]:
s3 = create_s3()
upload_to_s3(s3, "metadata", "stockds", metadata_csv)
upload_to_s3(s3, "stock_price", "stockds", stockPrice_csv) 

ibmsalesproject
stockds
File V/metadata/2023/03/20230306.csv already exists in bucket stockds
File V/stock_price/2023/03/20230306.csv already exists in bucket stockds
