# Investigate Price Correlations w/ Fundamentals

### For each metric of interest, X:

- create by-year metric data, X per share
- calculate the average ratio for price-to-X
- rate the price correlations using the following ways:
    - calculate the correlation between price and X
    - plot the price against the price-per-average-price-to-X-ratio

In [1]:
import json
import requests
import numpy as np
import yfinance as yf
from datetime import datetime

## Determine the earliest dates of price and financials data to include in analysis

In [2]:
# set years interval for analysis
analysis_interval_years = 20

# get the earlist fiscal year accordingly
start_year = datetime.utcnow().year - analysis_interval_years
start_date_quote_history = datetime(start_year, 1, 1)
start_year_financials_history = start_year - 1

print("Start date of quote history data:", start_date_quote_history)
print("Start year of financials history data:", start_year_financials_history)

Start date of quote history data: 2001-01-01 00:00:00
Start year of financials history data: 2000


## Pull and prep stock price data

- Use Apple as an example for the code below

### <font color=blue>Helper functions</font>

In [5]:
def get_payload_json_quote_history_from_yfinance_df(df_quote_history, header='Close'):
    """
    This helper function gets a cleaned-up payload json/dict from the dataframe of 
    historical quotes, which is returned by yfinance api (yfinance.Ticker('<symbol>').history)
    """
    
    _payload_json = {}
    _df = df_quote_history[header]
    
    # loop through all timestamps in the data:
    for timestamp in dict(_df):
        if not np.isnan(_df[timestamp]):
            _payload_json[timestamp.to_pydatetime()] = _df[timestamp]
            
    return _payload_json


def get_quote_history(symbol, interval='1mo', header='Close', 
                      start=start_date_quote_history.strftime('%Y-%m-%d'),
                      end=datetime.utcnow().strftime('%Y-%m-%d')):
    """
    This helper function pulls raw historical quote data using the YFinance API, 
    and returns the cleaned up data in a dictionary of "<timestamp>: <price>".
    """
    
    # initialize an yfinance object, and get the historical quote data 
    # from the API
    yf_object = yf.Ticker(symbol)
    df_quote_history = yf_object.history(interval=interval, start=start, end=end)
    
    # return a "timestamp": "price" dictionary from the dataframe of historical quotes
    return get_payload_json_quote_history_from_yfinance_df(df_quote_history, 
                                                           header=header)


# testing

# aapl_quote_history = get_quote_history('AAPL')
# aapl_quote_history

# testing successful