# Obtain and Append Data

This notebook contains code to obtain and append financial data to avoid going over API request limits.

In [1]:
import pandas as pd
from datetime import date
from datetime import timedelta
import plotly.graph_objects as go

import requests
import json
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as ticker

import numpy as np
from IPython.display import clear_output
import time
# Set plot space as inline for inline plots and qt for external plots
%matplotlib inline

In [2]:
import platform
print(platform.python_version())

3.8.3


In [3]:
# Obtain the API credentials from an external hidden file

from passwords import api_key

In [4]:
# A function to obtain price history

#Utilizing "UniBit API" by Vincent Jiang
#Site: https://unibit.ai/product

#Errors may occur if credits run out, add an error code check to this function

def retrieve_ticker_price_info(ticker, start_date, end_date = False):
    
    # Obtain today's date
    today = date.today()
    
    # A condition to set the end date to today if there is no end date given
    if not end_date:
        end_date = today
    
    
    url = 'https://api.unibit.ai/v2/stock/historical/?tickers={}&interval=1&startDate={}&endDate={}&selectedFields=all&dataType=json&accessKey={}'.format(ticker, start_date, end_date, api_key)
    
    response = requests.get(url)
    
    if response.status_code != 200:
        print("The response status code is: " + str(response.status_code))
        return None
    
    return response.json()
    

In [5]:
def retrieve_price_history_dataframe(ticker, ticker_price_history):
    
    data_frame = pd.DataFrame(ticker_price_history['result_data'][ticker])
    
    # Set the index to the 'date' column
    data_frame.set_index('date', inplace = True)
    
    data_frame.sort_index(inplace=True)
    
    return data_frame

In [6]:
today = date.today()
a_year_ago = str(date.today() - timedelta(days=1*365))
some_five_years_ago = str(date.today() - timedelta(days=5*365))

## Obtain Original Data from the API

In [7]:
# The S&P 500 company ticker is ^GSPC

# Obtain the price info
inx_info = retrieve_ticker_price_info('^GSPC', some_five_years_ago)

# Create and view the dataframe
inx_df = retrieve_price_history_dataframe('^GSPC', inx_info)
inx_df.head()

Unnamed: 0_level_0,volume,high,low,adj_close,close,open
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-08-31,3915100000,1986.73,1965.98,1972.18,1972.18,1986.73
2015-09-01,4371850000,1970.09,1903.07,1913.85,1913.85,1970.09
2015-09-02,3742620000,1948.91,1916.52,1948.86,1948.86,1916.52
2015-09-03,3520700000,1975.01,1944.72,1951.13,1951.13,1950.79
2015-09-04,3167090000,1947.76,1911.21,1921.22,1921.22,1947.76


In [8]:
# Save the data as a csv
# Keep the index as it contains the dates
#inx_df.to_csv('financial_data/GSPC.csv')
# five years
inx_df.to_csv('financial_data/august_31_2015_to_now_GSPC.csv')

## Import a CSV of Time Series Data

This code assumes the data is in ascending order, if this is not the case then make the relevant adjustments. 

In [9]:
inx_df = pd.read_csv('financial_data/august_31_2015_to_now_GSPC.csv', index_col = 'date')

# Convert index to date
# Comment this out if the index is already date type
inx_df.index = pd.to_datetime(inx_df.index).date

inx_df.head()

Unnamed: 0,volume,high,low,adj_close,close,open
2015-08-31,3915100000,1986.73,1965.98,1972.18,1972.18,1986.73
2015-09-01,4371850000,1970.09,1903.07,1913.85,1913.85,1970.09
2015-09-02,3742620000,1948.91,1916.52,1948.86,1948.86,1916.52
2015-09-03,3520700000,1975.01,1944.72,1951.13,1951.13,1950.79
2015-09-04,3167090000,1947.76,1911.21,1921.22,1921.22,1947.76


In [10]:
# Obtain the most recent date
inx_df.index.max()

datetime.date(2020, 8, 27)

## Obtain Most Recent Data From The API

In [11]:
# Obtain the next day after the series ends, as a string
next_day_series_ends = str(inx_df.index.max() + timedelta(days=1))
next_day_series_ends

'2020-08-28'

In [12]:
# The S&P 500 company ticker is ^GSPC

# Obtain the price info
additional_info = retrieve_ticker_price_info('^GSPC', next_day_series_ends)

# Create and view the dataframe
additional_info_df = retrieve_price_history_dataframe('^GSPC', additional_info)
additional_info_df.head()

KeyError: "None of ['date'] are in the columns"

In [14]:
additional_info_df.tail()

Unnamed: 0_level_0,volume,high,low,adj_close,close,open
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-08-21,1994452682,3399.96,3379.31,3397.16,3397.16,3386.01
2020-08-24,2271749784,3432.09,3413.13,3431.28,3431.28,3418.09
2020-08-25,2040869584,3444.21,3425.84,3443.62,3443.62,3435.95
2020-08-26,2003087362,3481.07,3444.15,3478.73,3478.73,3449.97
2020-08-27,2317902466,3501.38,3468.35,3484.55,3484.55,3485.14


## Append The Original Dataframe

You can either add to the old file or create a new one

In [15]:
result = pd.concat([inx_df, additional_info_df])

In [16]:
result.tail(15)

Unnamed: 0,volume,high,low,adj_close,close,open
2020-08-07,2282910037,3352.54,3328.72,3351.28,3351.28,3340.05
2020-08-10,2566548632,3363.29,3335.44,3360.47,3360.47,3356.04
2020-08-11,2900493747,3381.01,3326.44,3333.69,3333.69,3370.34
2020-08-12,2245003448,3387.89,3355.46,3380.35,3380.35,3355.46
2020-08-13,1964512010,3387.24,3363.35,3373.43,3373.43,3372.95
2020-08-14,1711020614,3378.51,3361.64,3372.85,3372.85,3368.66
2020-08-17,1951691959,3387.59,3379.22,3381.99,3381.99,3380.86
2020-08-18,1794617543,3395.06,3370.15,3389.78,3389.78,3387.04
2020-08-19,2018139580,3399.54,3369.66,3374.85,3374.85,3392.51
2020-08-20,1832094273,3390.8,3354.69,3385.51,3385.51,3360.48


## Save The New Dataframe

You can overwrite the old file as well

In [17]:
# Save the data as a csv
result.to_csv('financial_data/GSPC_newdata.csv', index_label = 'date')

In [13]:
# Read in the file
pd.read_csv('financial_data/GSPC.csv', index_col = 'date').tail(15)

Unnamed: 0_level_0,volume,high,low,adj_close,close,open
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-08-07,2282910037,3352.54,3328.72,3351.28,3351.28,3340.05
2020-08-10,2566548632,3363.29,3335.44,3360.47,3360.47,3356.04
2020-08-11,2900493747,3381.01,3326.44,3333.69,3333.69,3370.34
2020-08-12,2245003448,3387.89,3355.46,3380.35,3380.35,3355.46
2020-08-13,1964512010,3387.24,3363.35,3373.43,3373.43,3372.95
2020-08-14,1711020614,3378.51,3361.64,3372.85,3372.85,3368.66
2020-08-17,1951691959,3387.59,3379.22,3381.99,3381.99,3380.86
2020-08-18,1794617543,3395.06,3370.15,3389.78,3389.78,3387.04
2020-08-19,2018139580,3399.54,3369.66,3374.85,3374.85,3392.51
2020-08-20,1832094273,3390.8,3354.69,3385.51,3385.51,3360.48
