# Index Data

Sheet designed to aggregate all the useful data and store it in a database.

In [104]:
import pandas as pd
import hvplot.pandas
import sqlalchemy
from dotenv import load_dotenv
from pathlib import Path
import requests
import json
import os

load_dotenv('data/.env')
AVAPI = os.getenv('Alpha_Vantage_API')

database_connection_string = 'sqlite:///SP500.db'
engine = sqlalchemy.create_engine(database_connection_string)

### Import SP500  Closing Data

from https://www.spglobal.com/spdji/en/indices/equity/sp-500/#overview

In [105]:
SP500_Data = pd.read_csv(Path("Data/SP500to2012.csv"), index_col='Date', parse_dates=True, infer_datetime_format=True)
SP500_Data.head()

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2012-01-31,1312.41
2012-02-01,1324.09
2012-02-02,1325.54
2012-02-03,1344.9
2012-02-06,1344.33


### Import Constituent Info

From: https://datahub.io/core/s-and-p-500-companies-financials#resource-constituents-financials

In [106]:
SP500_Constituents_details = pd.read_csv(Path("Data/constituents_financials.csv"), index_col='Symbol')
SP500_Constituents_details = SP500_Constituents_details.sort_values(by=['Market Cap'], ascending=False)
SP500_Constituents_details.head()

Unnamed: 0_level_0,Name,Sector,Price,Price/Earnings,Dividend Yield,Earnings/Share,52 Week Low,52 Week High,Market Cap,EBITDA,Price/Sales,Price/Book,SEC Filings
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
AAPL,Apple Inc.,Information Technology,155.15,16.86,1.579541,9.2,180.1,131.12,809508000000.0,79386000000.0,3.458609,5.66,http://www.sec.gov/cgi-bin/browse-edgar?action...
GOOGL,Alphabet Inc Class A,Information Technology,1007.71,31.48,0.0,22.27,1198.0,824.3,733824000000.0,34217000000.0,6.801692,4.7,http://www.sec.gov/cgi-bin/browse-edgar?action...
GOOG,Alphabet Inc Class C,Information Technology,1001.52,40.29,0.0,22.27,1186.89,803.1903,728536000000.0,32714000000.0,6.772653,4.67,http://www.sec.gov/cgi-bin/browse-edgar?action...
MSFT,Microsoft Corp.,Information Technology,85.01,25.76,1.874791,2.97,96.07,63.22,689978000000.0,41079000000.0,7.113097,9.49,http://www.sec.gov/cgi-bin/browse-edgar?action...
AMZN,Amazon.com Inc,Consumer Discretionary,1350.5,296.16,0.0,6.16,1498.0,812.5,685873000000.0,16132000000.0,3.927053,24.28,http://www.sec.gov/cgi-bin/browse-edgar?action...


In [77]:
SP500_List = SP500_Constituents_details.index.tolist()

### Import an Individual Stock Ticker

from: Alpha Advantage API https://www.alphavantage.co/documentation/#dailyadj

In [40]:
url = f'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=IBM&outputsize=full&apikey={AVAPI}'
r = requests.get(url)
data = r.json()
print(json.dumps(data, indent=4, sort_keys=True))

{
    "Meta Data": {
        "1. Information": "Daily Prices (open, high, low, close) and Volumes",
        "2. Symbol": "IBM",
        "3. Last Refreshed": "2022-02-04",
        "4. Output Size": "Full size",
        "5. Time Zone": "US/Eastern"
    },
    "Time Series (Daily)": {
        "1999-11-01": {
            "1. open": "98.5000",
            "2. high": "98.8100",
            "3. low": "96.3700",
            "4. close": "96.7500",
            "5. volume": "9551800"
        },
        "1999-11-02": {
            "1. open": "96.7500",
            "2. high": "96.8100",
            "3. low": "93.6900",
            "4. close": "94.8100",
            "5. volume": "11105400"
        },
        "1999-11-03": {
            "1. open": "95.8700",
            "2. high": "95.9400",
            "3. low": "93.5000",
            "4. close": "94.3700",
            "5. volume": "10369100"
        },
        "1999-11-04": {
            "1. open": "94.4400",
            "2. high": "94.4400",
     

In [47]:
df=pd.DataFrame.from_dict(data['Time Series (Daily)'], orient="index")
df.head()

Unnamed: 0,1. open,2. high,3. low,4. close,5. volume
2022-02-04,137.86,138.82,136.215,137.15,4142045
2022-02-03,137.0,138.76,135.831,137.78,6100777
2022-02-02,135.7,137.555,135.26,137.25,5357237
2022-02-01,133.76,135.96,132.5,135.53,6206448
2022-01-31,134.09,134.09,132.3,133.57,5859043


### Aggregate Constituent DF

Create the table, with AAPL, then append the rest of the table with a for loop

In [117]:
# COMMENTED OUT, no longer needed, used to initilize the database.


# Initial_url = f'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=AAPL&outputsize=full&apikey={AVAPI}'
# response = requests.get(Initial_url)
# data = response.json()
# AAPL_df=pd.DataFrame.from_dict(data['Time Series (Daily)'], orient="index")
# AAPL_df = AAPL_df.rename(columns={'4. close':'AAPL'})
# AAPL_df = AAPL_df.drop(columns={'1. open','2. high','3. low','5. volume'})
# df.to_sql('close', engine)
# AAPL_df.head()

In [118]:
return_table = pd.read_sql_table('close', engine)
return_table.head()

Unnamed: 0,index,AAPL
0,2022-02-04,172.39
1,2022-02-03,172.9
2,2022-02-02,175.84
3,2022-02-01,174.61
4,2022-01-31,174.78


  """Entry point for launching an IPython kernel.


['close']

In [116]:
return_table = pd.read_sql_table('close', engine)
return_table.head()

Unnamed: 0,index,AAPL
0,2022-02-04,172.39
1,2022-02-03,172.9
2,2022-02-02,175.84
3,2022-02-01,174.61
4,2022-01-31,174.78
