In [63]:
import import_ipynb
import pandas as pd
from extract_data import extract_data_from_api
import datetime
import io
from io import StringIO
import boto3
import codecs
import os

In [64]:
response = extract_data_from_api("V", "1m", "1d")
print(response)

{'meta': {'currency': 'USD', 'symbol': 'V', 'exchangeName': 'NYQ', 'instrumentType': 'EQUITY', 'firstTradeDate': 1205933400, 'regularMarketTime': 1678737602, 'gmtoffset': -14400, 'timezone': 'EDT', 'exchangeTimezoneName': 'America/New_York', 'regularMarketPrice': 214.47, 'chartPreviousClose': 216.14, 'previousClose': 216.14, 'scale': 3, 'priceHint': 2, 'currentTradingPeriod': {'pre': {'timezone': 'EDT', 'end': 1678800600, 'start': 1678780800, 'gmtoffset': -14400}, 'regular': {'timezone': 'EDT', 'end': 1678824000, 'start': 1678800600, 'gmtoffset': -14400}, 'post': {'timezone': 'EDT', 'end': 1678838400, 'start': 1678824000, 'gmtoffset': -14400}}, 'tradingPeriods': [[{'timezone': 'EDT', 'end': 1678737600, 'start': 1678714200, 'gmtoffset': -14400}]], 'dataGranularity': '1m', 'range': '1d', 'validRanges': ['1d', '5d', '1mo', '3mo', '6mo', '1y', '2y', '5y', '10y', 'ytd', 'max']}, 'timestamp': [1678714200, 1678714260, 1678714320, 1678714380, 1678714440, 1678714500, 1678714560, 1678714620, 167

In [65]:
def round_decimal(numbers):
    for i in range(len(numbers)):
        if numbers[i] is not None:
            numbers[i] = round(numbers[i],2)
    return numbers

In [66]:
#Functions for creating dataframe from response object
def create_df_metadata(metadata_response):
    trading_periods = metadata_response["tradingPeriods"]
    start_time = datetime.datetime.fromtimestamp(trading_periods[0][0]['start']).strftime("%H:%M")
    end_time = datetime.datetime.fromtimestamp(trading_periods[0][0]['end']).strftime("%H:%M")
    period = start_time + " - " + end_time

    important_columns = {"symbol": metadata_response["symbol"], 
                         "instrumentType":metadata_response["instrumentType"],
                         "trade_period": period,
                         'timezone': metadata_response['timezone'], "range": metadata_response["range"],
                         "interval": metadata_response["dataGranularity"]}
    df_metadata = pd.DataFrame(important_columns, index =[0])

    return df_metadata

def create_df_timestamp(timestamp_response):
    timestamps = response["timestamp"]
    converted_timestamp = []
    for timestamp in timestamps:
        converted_timestamp.append(datetime.datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d %H:%M:%S"))
    df_timestamp = pd.DataFrame({"timestamp": converted_timestamp})
    return df_timestamp


def create_df_indicators(indicators_response):
    indicators = response["indicators"]["quote"]
    indicators_dictionary = {"Volume": indicators[0]["volume"],
                         "Open": round_decimal(indicators[0]["open"]),
                         "Close": round_decimal(indicators[0]["close"]),
                         "High": round_decimal(indicators[0]["high"]),
                         "Low": round_decimal(indicators[0]["low"])}
    df_indicators = pd.DataFrame(indicators_dictionary)

    return df_indicators

df_metadata = create_df_metadata(response["meta"])
print(df_metadata)

  symbol instrumentType   trade_period timezone range interval
0      V         EQUITY  09:30 - 16:00      EDT    1d       1m


In [67]:
#Create dataframe for indicator and timestamp and merge all dataframe together
df_indicator = create_df_indicators(response["indicators"])
df_timestamp = create_df_timestamp(response["timestamp"])
df_merged = pd.concat([df_metadata["symbol"], df_indicator,df_timestamp], axis=1)
df_merged["symbol"] = df_merged["symbol"].ffill()
df_merged = df_merged.fillna(0)
df_merged["Volume"] = df_merged["Volume"].astype(int)
print(df_merged.head())

<class 'int'>
  symbol  Volume    Open   Close    High     Low            timestamp
0      V  145640  214.63  215.13  215.17  214.12  2023-03-13 09:30:00
1      V   29271  215.07  214.68  215.19  214.45  2023-03-13 09:31:00
2      V    9716  214.58  214.26  214.91  214.19  2023-03-13 09:32:00
3      V   16146  214.24  214.19  214.38  213.78  2023-03-13 09:33:00
4      V   58253  214.04  213.04  214.04  213.02  2023-03-13 09:34:00


In [68]:
def to_csv(df):
    df_buffer = io.StringIO()
    df.to_csv(df_buffer, index=False)
    df_csv = df_buffer.getvalue()

    return df_csv

metadata_csv = to_csv(df_metadata)
stockPrice_csv = to_csv(df_merged)

In [69]:
#Create S3 instance
def create_s3():
    s3_client = boto3.client("s3", region_name = "us-east-1",
                                aws_access_key_id=os.environ.get("AWS_ACCESS_KEY"), 
                                aws_secret_access_key=os.environ.get("AWS_SECRET_ACCESSKEY"))
    response = s3_client.list_buckets()
    for bucket in response["Buckets"]:
        print(f"{bucket['Name']}")
    return s3_client

#Upload file to s3
def upload_to_s3(s3, folder, bucket, body):
    metadata_columns_dict = dict(df_metadata.loc[0])
    timestamp_dict = dict(df_merged.loc[:0, "timestamp"])

    timestamp = timestamp_dict[0]
    year = timestamp[:4] 
    month = timestamp[5:7]  
    date = timestamp[8:10]
    
    key = os.path.join(metadata_columns_dict["symbol"], folder, year, month,f"{year}{month}{date}.csv")
    try:
        s3.head_object(Bucket=bucket, Key=key)
        print(f"File {key} already exists in bucket {bucket}")
    except:
        try:
            s3.put_object(Bucket=bucket, Body=body, Key=key)
            print(f"Uploaded file {key} to bucket {bucket}")
        except Exception as e:
            print(f"Error uploading file {key} to bucket {bucket}: {e}")
    return key

In [70]:
s3 = create_s3()
print(s3.meta.region_name)
metadata_key = upload_to_s3(s3, "metadata", "stockde", metadata_csv)
stockprice_key = upload_to_s3(s3, "stock_price", "stockde", stockPrice_csv) 

ibmsalesproject
stockde
us-east-1
Uploaded file V/metadata/2023/03/20230313.csv to bucket stockde
Uploaded file V/stock_price/2023/03/20230313.csv to bucket stockde


In [71]:
# s3.delete_object(Bucket="stockde", Key= metadata_key)
# s3.delete_object(Bucket="stockde", Key= stockprice_key)

In [72]:
%store metadata_key
%store stockprice_key

Stored 'metadata_key' (str)
Stored 'stockprice_key' (str)
