# Obtain and Append Data

This notebook contains code to obtain and append financial data to avoid going over API request limits.

In [1]:
import pandas as pd
from datetime import date
from datetime import timedelta
import plotly.graph_objects as go

import requests
import json
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as ticker

import numpy as np
from IPython.display import clear_output
import time
# Set plot space as inline for inline plots and qt for external plots
%matplotlib inline

In [2]:
import platform
print(platform.python_version())

3.8.3


In [3]:
# Obtain the API credentials from an external hidden file

from passwords import api_key

In [4]:
# A function to obtain price history

#Utilizing "UniBit API" by Vincent Jiang
#Site: https://unibit.ai/product

#Errors may occur if credits run out, add an error code check to this function

def retrieve_ticker_price_info(ticker, start_date, end_date = False):
    
    # Obtain today's date
    today = date.today()
    
    # A condition to set the end date to today if there is no end date given
    if not end_date:
        end_date = today
    
    
    url = 'https://api.unibit.ai/v2/stock/historical/?tickers={}&interval=1&startDate={}&endDate={}&selectedFields=all&dataType=json&accessKey={}'.format(ticker, start_date, end_date, api_key)
    
    response = requests.get(url)
    
    if response.status_code != 200:
        print("The response status code is: " + str(response.status_code))
        return None
    
    return response.json()
    

In [5]:
def retrieve_price_history_dataframe(ticker, ticker_price_history):
    
    data_frame = pd.DataFrame(ticker_price_history['result_data'][ticker])
    
    # Set the index to the 'date' column
    data_frame.set_index('date', inplace = True)
    
    data_frame.sort_index(inplace=True)
    
    return data_frame

In [16]:
today = date.today()
a_year_ago = str(date.today() - timedelta(days=1*365))
some_five_years_ago = str(date.today() - timedelta(days=5*365))

## Obtain Original Data from the API

In [17]:
# The S&P 500 company ticker is ^GSPC

# Obtain the price info
inx_info = retrieve_ticker_price_info('^GSPC', some_five_years_ago)

# Create and view the dataframe
inx_df = retrieve_price_history_dataframe('^GSPC', inx_info)
inx_df.head()

Unnamed: 0_level_0,volume,high,low,adj_close,close,open
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-08-04,3546710000,2102.51,2088.6,2093.32,2093.32,2097.68
2015-08-05,3968680000,2112.66,2095.27,2099.84,2099.84,2095.27
2015-08-06,4246570000,2103.32,2075.53,2083.56,2083.56,2100.75
2015-08-07,3602320000,2082.61,2067.91,2077.57,2077.57,2082.61
2015-08-10,3514460000,2105.35,2080.98,2104.18,2104.18,2080.98


In [11]:
# Save the data as a csv
# Keep the index as it contains the dates
#inx_df.to_csv('financial_data/GSPC.csv')
# five years
inx_df.to_csv('financial_data/august_5_2015_to_now_GSPC.csv')

## Import a CSV of Time Series Data

This code assumes the data is in ascending order, if this is not the case then make the relevant adjustments. 

In [8]:
inx_df = pd.read_csv('financial_data/GSPC.csv', index_col = 'date')

# Convert index to date
# Comment this out if the index is already date type
inx_df.index = pd.to_datetime(inx_df.index).date

inx_df.head()

Unnamed: 0,volume,high,low,adj_close,close,open
2019-07-22,3003720000,2990.71,2976.65,2985.03,2985.03,2981.93
2019-07-23,3313660000,3005.9,2988.56,3005.47,3005.47,2994.74
2019-07-24,3428980000,3019.59,2996.82,3019.56,3019.56,2998.77
2019-07-25,3645270000,3016.31,2997.24,3003.67,3003.67,3016.26
2019-07-26,3257590000,3027.98,3012.59,3025.86,3025.86,3013.25


In [9]:
# Obtain the most recent date
inx_df.index.max()

datetime.date(2020, 6, 30)

## Obtain Most Recent Data From The API

In [10]:
# Obtain the next day after the series ends, as a string
next_day_series_ends = str(inx_df.index.max() + timedelta(days=1))

In [11]:
# The S&P 500 company ticker is ^GSPC

# Obtain the price info
additional_info = retrieve_ticker_price_info('^GSPC', next_day_series_ends)

# Create and view the dataframe
additional_info_df = retrieve_price_history_dataframe('^GSPC', additional_info)
additional_info_df.head()

Unnamed: 0_level_0,volume,high,low,adj_close,close,open
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-07-01,2697727407,3128.44,3101.17,3115.86,3115.86,3105.92
2020-07-02,2326767585,3165.81,3124.52,3130.01,3130.01,3143.64
2020-07-06,2380431984,3182.59,3155.29,3179.72,3179.72,3155.29
2020-07-07,2287837089,3184.15,3142.93,3145.32,3145.32,3166.44
2020-07-08,2390189825,3171.8,3136.53,3169.94,3169.94,3153.07


## Append The Original Dataframe

You can either add to the old file or create a new one

In [12]:
result = pd.concat([inx_df, additional_info_df])

In [13]:
result.tail(15)

Unnamed: 0,volume,high,low,adj_close,close,open
2020-07-13,2695331634,3235.32,3149.43,3155.22,3155.22,3205.08
2020-07-14,2628539056,3200.95,3127.66,3197.52,3197.52,3141.11
2020-07-15,2841976156,3238.28,3200.76,3226.56,3226.56,3225.98
2020-07-16,1718211640,3217.51,3198.59,3215.57,3217.5,3208.36
2020-07-17,2219971191,3233.52,3205.65,3224.73,3224.73,3224.21
2020-07-20,2171757450,3258.61,3215.16,3251.84,3251.84,3224.29
2020-07-21,2472394555,3277.29,3247.77,3257.3,3257.3,3268.52
2020-07-22,2412594515,3279.32,3253.1,3276.02,3276.02,3254.86
2020-07-23,2737810512,3279.99,3222.66,3235.66,3235.66,3271.64
2020-07-24,2460013758,3227.26,3200.05,3215.63,3215.63,3218.58


## Save The New Dataframe

You can overwrite the old file as well

In [14]:
# Save the data as a csv
result.to_csv('financial_data/GSPC_newdata.csv', index_label = 'date')

In [15]:
# Read in the file
pd.read_csv('financial_data/GSPC_newdata.csv', index_col = 'date').tail(15)

Unnamed: 0_level_0,volume,high,low,adj_close,close,open
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-07-13,2695331634,3235.32,3149.43,3155.22,3155.22,3205.08
2020-07-14,2628539056,3200.95,3127.66,3197.52,3197.52,3141.11
2020-07-15,2841976156,3238.28,3200.76,3226.56,3226.56,3225.98
2020-07-16,1718211640,3217.51,3198.59,3215.57,3217.5,3208.36
2020-07-17,2219971191,3233.52,3205.65,3224.73,3224.73,3224.21
2020-07-20,2171757450,3258.61,3215.16,3251.84,3251.84,3224.29
2020-07-21,2472394555,3277.29,3247.77,3257.3,3257.3,3268.52
2020-07-22,2412594515,3279.32,3253.1,3276.02,3276.02,3254.86
2020-07-23,2737810512,3279.99,3222.66,3235.66,3235.66,3271.64
2020-07-24,2460013758,3227.26,3200.05,3215.63,3215.63,3218.58
