In [1]:
import requests
import pandas as pd
import numpy as np
from datetime import datetime
# documentation: https://www.alphavantage.co/documentation/#time-series-data
# VOO started 2010-09. API call returns trailing 30 days from current date. If month is specified is pulls in that month. Will have to pull the full data over several days since I'm capped at 25 calls a day. 
#url = 'https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol=VOO&interval=1min&month=2010-09&outputsize=full&apikey=TGOB3EBQFPDYLP2Z'
# not specifying month returns current 30 days
#url = 'https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol=VOO&interval=1min&outputsize=full&apikey=TGOB3EBQFPDYLP2Z'

## Pull VOO daily 1 minute open, close, high, low, and volume

In [2]:
# get list of files already loaded
from os import walk

filenames = next(walk(r'C:\Users\connor\PycharmProjects\trading\data\base\voo_intraday'), (None, None, []))[2][1:]

vantage_api_key = 'TGOB3EBQFPDYLP2Z'

In [3]:
min_date = np.datetime64(filenames[0][4:11].replace('_','-'))
# daily api call limit is 25
for dummy in range(25):
    # grab api data for the most recent month that has no data
    min_date = min_date + np.timedelta64(-1, 'M')
    url = f'https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol=VOO&interval=1min&month={min_date}&outputsize=full&apikey={vantage_api_key}'
    r = requests.get(url)
    data = r.json()
    print(data)

    # convert to dataframe
    df = pd.DataFrame.from_dict(data)
    # drop meta data rows
    df_no_meta = df.iloc[6:]
    df_no_meta.reset_index(inplace=True)
    date_time = df_no_meta['index']
    # split dict column out into 5 columns
    df_no_meta[['open', 'high', 'low', 'close', 'volume']] = df_no_meta['Time Series (1min)'].apply(pd.Series)
    df_no_meta = df_no_meta.drop(columns=['Meta Data','Time Series (1min)'])
    
    # save as csv file
    min_date_to_save_format = str(min_date).replace('-', '_')
    df_no_meta.to_csv(fr'C:\Users\connor\PycharmProjects\trading\data\base\voo_intraday\voo_{min_date_to_save_format}.csv', index=False)
    
    # confirm names made it this far
    print(fr'C:\Users\connor\PycharmProjects\trading\data\base\voo_intraday\voo_{min_date_to_save_format}.csv')

{'Meta Data': {'1. Information': 'Intraday (1min) open, high, low, close prices and volume', '2. Symbol': 'VOO', '3. Last Refreshed': '2010-09-30 15:58:00', '4. Interval': '1min', '5. Output Size': 'Full size', '6. Time Zone': 'US/Eastern'}, 'Time Series (1min)': {'2010-09-30 15:58:00': {'1. open': '81.1210', '2. high': '81.2250', '3. low': '81.0100', '4. close': '81.1520', '5. volume': '733'}, '2010-09-30 15:54:00': {'1. open': '81.1680', '2. high': '81.2710', '3. low': '81.0570', '4. close': '81.1990', '5. volume': '250'}, '2010-09-30 15:52:00': {'1. open': '81.2140', '2. high': '81.3180', '3. low': '81.1030', '4. close': '81.2450', '5. volume': '383'}, '2010-09-30 15:51:00': {'1. open': '81.2300', '2. high': '81.3340', '3. low': '81.1190', '4. close': '81.2610', '5. volume': '600'}, '2010-09-30 15:49:00': {'1. open': '81.1990', '2. high': '81.3020', '3. low': '81.0880', '4. close': '81.2300', '5. volume': '250'}, '2010-09-30 15:46:00': {'1. open': '81.1210', '2. high': '81.2250', '3

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_no_meta[['open', 'high', 'low', 'close', 'volume']] = df_no_meta['Time Series (1min)'].apply(pd.Series)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_no_meta[['open', 'high', 'low', 'close', 'volume']] = df_no_meta['Time Series (1min)'].apply(pd.Series)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-

{'Error Message': 'Invalid API call. Please retry or visit the documentation (https://www.alphavantage.co/documentation/) for TIME_SERIES_INTRADAY.'}


ValueError: If using all scalar values, you must pass an index

## Get technical indicators
### Exponential Moving Average
### on balance volume
### moving average convergence / divergence controllable moving average type

In [None]:
# 7 day EMA for VOO
url = f'https://www.alphavantage.co/query?function=EMA&symbol=VOO&interval=daily&time_period=30&series_type=open&apikey={vantage_api_key}'
r = requests.get(url)
ema_data = r.json()
print(ema_data['Technical Analysis: EMA'])

In [None]:
# convert to dataframe

df = pd.DataFrame.from_dict(ema_data['Technical Analysis: EMA'], orient='index')
df.reset_index(inplace=True)
df.rename(columns={'index':'date', 'EMA':'seven_day_ema'}, inplace=True)
df.to_csv(fr'C:\Users\connor\PycharmProjects\trading\data\base\ema\voo_ema.csv', index=False)
    
# confirm names made it this far
print(fr'C:\Users\connor\PycharmProjects\trading\data\base\ema\voo_ema.csv')

In [None]:
# on balance volume
url = f'https://www.alphavantage.co/query?function=OBV&symbol=VOO&interval=daily&apikey={vantage_api_key}'
r = requests.get(url)
obv_data = r.json()

print(obv_data['Technical Analysis: OBV'])

In [None]:
# convert to dataframe

df = pd.DataFrame.from_dict(obv_data['Technical Analysis: OBV'], orient='index')
df.reset_index(inplace=True)
df.rename(columns={'index':'date', 'OBV':'daily_obv'}, inplace=True)
df.to_csv(fr'C:\Users\connor\PycharmProjects\trading\data\base\obv\voo_obv.csv', index=False)
    
# confirm names made it this far
print(fr'C:\Users\connor\PycharmProjects\trading\data\base\obv\voo_obv.csv')

In [None]:
# MACDEXT
url = f'https://www.alphavantage.co/query?function=MACDEXT&symbol=VOO&interval=daily&series_type=open&apikey={vantage_api_key}'
r = requests.get(url)
macdext_data = r.json()

print(macdext_data)

In [None]:
# convert to dataframe

df = pd.DataFrame.from_dict(macdext_data['Technical Analysis: MACDEXT'], orient='index')
df.reset_index(inplace=True)
df.rename(columns={'index':'date', 'MACD':'macd', 'MACD_Signal':'macd_signal', 'MACD_Hist':'macd_hist'}, inplace=True)
df.to_csv(fr'C:\Users\connor\PycharmProjects\trading\data\base\macdext\voo_macdext.csv', index=False)
    
# confirm names made it this far
print(fr'C:\Users\connor\PycharmProjects\trading\data\base\macdext\voo_macdext.csv')

## Pull FRED GDP Data
### GDP Per Capita
### Treasury Yield
### Federal Funds Rate
### CPI
### Inflation
### Retail Sales
### Unemployment

In [None]:
# API key 
fred_key = '8573006c071842b6ad8002cffce7cc30'

# real GDP, 10-Year Treasury Constant Maturity Minus Federal Funds Rate,  Federal Funds Effective Rate, Consumer Price Index: All Items: Total for United States, 10-Year Expected Inflation, consumer prices for the United States, Advance Retail Sales: Retail Trade,  Unemployment Rate
fred_table_list = ['GDPC1', 'T10YFF', 'DFF', 'CPALTT01USM657N', 'EXPINF10YR', 'RSXFS', 'UNRATE']

In [None]:
# Pull each table from FRED API and save as CSV
for table in fred_table_list:
    fred_url = fred_url = f'https://api.stlouisfed.org/fred/series/observations?series_id={table}&api_key={fred_key}&file_type=json'
    fred_request = requests.get(fred_url)
    data = fred_request.json()
    # convert to dataframe, keep only the observations data
    df_data = pd.DataFrame(data['observations'])
    # These two fields are useful for keeping track of type 2 data. We only need current as-is data so these fields aren't needed
    df_data.drop(columns=['realtime_start', 'realtime_end'], inplace=True)
    df_data.sort_values(by=['date'], inplace=True)
    df_data.rename(columns={'date':'date', 'value': table}, inplace=True)
    df_data.to_csv(fr'C:\Users\connor\PycharmProjects\trading\data\base\fred_data\{table}.csv', index=False)
    print(df_data.head())
    print(f"Successfully saved Fred {table} data")