In [None]:
import re
import json
import time
import requests
import pandas as pd
from datetime import datetime as dt

In [None]:
QUOTE_SUMMARY_BASE_URL = "https://query1.finance.yahoo.com/v10/finance/quoteSummary/"
CHART_BASE_URL = "https://query1.finance.yahoo.com/v8/finance/chart/"

In [None]:
# Help Functions

def get_response(url, parameters=None):
    user_agent_header = {
              "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) \
                             AppleWebKit/605.1.15 (KHTML, like Gecko) \
                             Chrome/100.0.4896.127 Safari/605.1.15 Firefox/100.0" 
             }

    session = requests.Session()
    response = session.get(url=url, params=parameters, headers=user_agent_header)

    return response.json()


def get_companies():
    url = "https://en.wikipedia.org/wiki/List_of_S&P_500_companies"
    data = pd.read_html(url)[0]

    return data


def datetime_to_timestamp(val):
    return int(time.mktime(time.strptime(val, "%Y-%m-%d %H:%M:%S")))


def timestamp_to_datetime(val):
    return dt.fromtimestamp(val)


def camelcase_to_title(o):
    return [re.sub("([a-z])([A-Z])", r"\g<1> \g<2>", i).title() for i in o]


In [None]:
datetime_to_timestamp("2022-02-19 12:45:23")

1645274723

In [None]:
str(timestamp_to_datetime(1645274723))

'2022-02-19 12:45:23'

In [None]:
symbol_list = [
               "AAPL", "ABBV", "ABT", "AMGN", "AMZN", "ASML", "BBY", "BMY",
               "C", "CAT", "CL", "COST", "CVX", "DE", "GOOGL", "GS",
               "HD", "HON", "HPQ", "IBM", "INTC", "JNJ", "JPM", "KO",
               "LLY", "LOW", "MA", "MCD", "MMM", "MRK", "MSFT", "MU",
               "NKE", "PEP", "PFE", "PG", "TGT", "TJX", "TMO", "TSM",
               "UNH", "V", "WM", "WMT", "XOM"
               ]

In [None]:
df = get_companies()
df = df.loc[df['Symbol'].isin(symbol_list)].sort_values('Symbol').reset_index(drop=True)
df[['Symbol', 'GICS Sector']]

Unnamed: 0,Symbol,GICS Sector
0,AAPL,Information Technology
1,ABBV,Health Care
2,ABT,Health Care
3,AMGN,Health Care
4,AMZN,Consumer Discretionary
5,BBY,Consumer Discretionary
6,BMY,Health Care
7,C,Financials
8,CAT,Industrials
9,CL,Consumer Staples


In [None]:
get_companies()

Unnamed: 0,Symbol,Security,SEC filings,GICS Sector,GICS Sub-Industry,Headquarters Location,Date first added,CIK,Founded
0,MMM,3M,reports,Industrials,Industrial Conglomerates,"Saint Paul, Minnesota",1976-08-09,66740,1902
1,AOS,A. O. Smith,reports,Industrials,Building Products,"Milwaukee, Wisconsin",2017-07-26,91142,1916
2,ABT,Abbott,reports,Health Care,Health Care Equipment,"North Chicago, Illinois",1964-03-31,1800,1888
3,ABBV,AbbVie,reports,Health Care,Pharmaceuticals,"North Chicago, Illinois",2012-12-31,1551152,2013 (1888)
4,ABMD,Abiomed,reports,Health Care,Health Care Equipment,"Danvers, Massachusetts",2018-05-31,815094,1981
...,...,...,...,...,...,...,...,...,...
498,YUM,Yum! Brands,reports,Consumer Discretionary,Restaurants,"Louisville, Kentucky",1997-10-06,1041061,1997
499,ZBRA,Zebra Technologies,reports,Information Technology,Electronic Equipment & Instruments,"Lincolnshire, Illinois",2019-12-23,877212,1969
500,ZBH,Zimmer Biomet,reports,Health Care,Health Care Equipment,"Warsaw, Indiana",2001-08-07,1136869,1927
501,ZION,Zions Bancorporation,reports,Financials,Regional Banks,"Salt Lake City, Utah",2001-06-22,109380,1873


In [None]:
# Valid ranges:     [1d, 5d, 1mo, 3mo, 6mo, 1y, 2y, 5y, 10y, ytd, max]
# Valid intervals:  [1m, 2m, 5m, 15m, 30m, 60m, 90m, 1h, 1d, 5d, 1wk, 1mo, 3mo]

In [None]:
t = int(time.time())
d = int(dt.now().timestamp())
t, d

(1656507011, 1656507011)

In [None]:
from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds
from pandas.core.indexes.interval import interval_range
class SingleBase:
    def __init__(self, ticker):
        self._ticker = ticker.upper()


    def _fetch_historical_data(self, start, end, range, interval, events=None):
        url = f"{CHART_BASE_URL}{self._ticker}"

        if start or range is None or range.lower() == "max":
            if end is None:
                end = int(time.time())
            elif isinstance(end, delta_to_nanoseconds.datetime):
                end = int(time.mktime(end.timetuple()))
            else:
                end = int(time.mktime(time.strptime(str(end), "%Y-%m-%d"))) 
            if start is None:
                if interval=="1m":
                    start = end - 604800
                else:
                    start = -631159200
            elif isinstance(start, dt.datetime):
                start = int(time.mktime(start.timetuple()))
            else:
                start = int(time.mktime(
                    time.strptime(str(start), '%Y-%m-%d')))
            params = {"period1": start, "period2": end}
        else:
            params = {"range": range.lower()}

        params["interval"] = interval.lower()
        params["events"] = "div,splits"

        response = get_response(url, params)

        return response["chart"]["result"][0]


    def _get_price_history(self, start, end, range, interval):
        try:
            data = self._fetch_historical_data(start, end, range, interval)
        except TypeError:
            raise TypeError("No data available. Please make sure that the range and/or interval is valid.")

        quotes = data["indicators"]["quote"][0]

        if interval in ["1m", "2m", "5m", "15m", "30m", "60m", "90m", "1h"]:
            dates = [dt.fromtimestamp(int(ts)) for ts in data["timestamp"]]
        else:
            dates = [dt.fromtimestamp(int(ts)).date() for ts in data["timestamp"]]
            quotes.update(data["indicators"]["adjclose"][0])

        return pd.DataFrame(quotes, index=dates)


    def _get_dividends(self, start, end, range):
        try:
            data = self._fetch_historical_data(start, end, range, interval="1d", events="div")
        except TypeError:
            raise TypeError("No data available. Please make sure that the range and/or interval is valid.")

        try:
            dividends = data["events"]["dividends"]
        except KeyError:
            raise KeyError("No data available. Please make sure that the required range is more than 3 months.")

        dates = [dt.fromtimestamp(int(dividends[i]["date"])).date() for i in dividends.keys()]
        amounts = [dividends[i]["amount"] for i in dividends.keys()]

        return pd.DataFrame(amounts, index=dates, columns=["dividends"])

In [None]:
class Ticker(SingleBase):
    def __repr__(self):
        return "fa.Ticker object <%s>" % self._ticker

    def price_history(self, start=None, end=None, range="max", interval="1d"):
        """
        Description
        ----
        Returns historical prices of a company.

        Inputs
        ----
        range (string)
            The range of the data (default = "max").
            Valid ranges: "1d","5d","1mo","3mo","6mo","1y","2y","5y","10y","ytd","max"
        interval (string)
            The interval of the data (default = "1d").
            Valid intervals: "1m", "2m", "5m", "15m","30m","60m", "90m",
                             "1h", "1d", "5d", "1wk", "1mo", "3mo".
            Note that an interval less than "1h" can only hold a range "1mo" or less.
        start (string)
            The start date of a required historical range in the format %Y-%m-%d
            (Choose either start/end or range).
        end (string)
            The end date of a required historical range in the format %Y-%m-%d
            (Choose either start/end or range).

        Output
        ----
        data (dataframe)
            Data with dates in rows and the quotes in columns.
        """
        return self._get_price_history(start, end, range, interval)


    def dividends(self, start=None, end=None, range="max"):
        """
        Description
        ----
        Returns historical dividends of a company.

        Inputs
        ----
        range (string)
            The range of the data (default = "max").
            Valid ranges: "3mo","6mo","1y","2y","5y","10y","ytd","max"
        start (string)
            The start date of a required historical range in the format %Y-%m-%d
            (Choose either start/end or range).
        end (string)
            The end date of a required historical range in the format %Y-%m-%d
            (Choose either start/end or range).

        Output
        ----
        data (dataframe)
            Data with dates in rows and the quotes in columns.
        """
        return self._get_dividends(start, end, range)


In [None]:
response = get_response("https://query1.finance.yahoo.com/v8/finance/chart/AAPL?period1=1643673600&period2=1645315200&interval=&events=div")
data = response["chart"]["result"][0]
data = data['events']['dividends']
dates = [dt.fromtimestamp(int(data[i]["date"])).date() for i in data.keys()]
values = [data[i]["amount"] for i in data.keys()]
pd.DataFrame(values, index=dates, columns=["dividends"])

Unnamed: 0,dividends
2022-02-04,0.22


In [None]:
# Interval "1m" available for the ranges from "1m" to "7d" | "1wk"
# Intervals "2m", "5m", "15m", "30m", "90m" available for the ranges from "1m" to "60d" | "8wk" | "1mo"
# Intervals "60m", "1h" available for the ranges from "1m" to "730d" | "104wk" | "23mo" | "2y"
# Intervals "1d", "1wk", "1mo", "3mo" available for all ranges

In [None]:
company = Ticker("aapl")
company.price_history()

Unnamed: 0,low,high,open,volume,close,adjclose
1980-12-12,0.128348,0.128906,0.128348,469033600,0.128348,0.100178
1980-12-15,0.121652,0.122210,0.122210,175884800,0.121652,0.094952
1980-12-16,0.112723,0.113281,0.113281,105728000,0.112723,0.087983
1980-12-17,0.115513,0.116071,0.115513,86441600,0.115513,0.090160
1980-12-18,0.118862,0.119420,0.118862,73449600,0.118862,0.092774
...,...,...,...,...,...,...
2022-06-22,133.910004,137.759995,134.789993,73409200,135.350006,135.350006
2022-06-23,135.630005,138.589996,136.820007,72433800,138.270004,138.270004
2022-06-24,139.770004,141.910004,139.899994,89047400,141.660004,141.660004
2022-06-27,140.970001,143.490005,142.699997,70207900,141.660004,141.660004


In [None]:
url = "https://query1.finance.yahoo.com/v10/finance/quoteSummary/aapl?modules=incomeStatementHistoryQuarterly"

r = get_response(url)
data = r["quoteSummary"]["result"][0]["incomeStatementHistoryQuarterly"]["incomeStatementHistory"]

new_data = json.dumps(data)
new_data = re.sub(r'\{[\'|\"]raw[\'|\"]:(.*?),(.*?)\}', r'\1', new_data)
new_data = json.loads(new_data)
labels = [timestamp_to_datetime(val["endDate"]).date() for val in new_data]
df = pd.DataFrame(new_data, index=labels).drop(columns=['maxAge', 'endDate'])
df = df.T
df = df.apply(lambda x: pd.to_numeric(x, errors='coerce'))
df.index = camelcase_to_title(df.index)
df

Unnamed: 0,2022-03-26,2021-12-25,2021-09-25,2021-06-26
Total Revenue,97278000000.0,123945000000.0,83360000000.0,81434000000.0
Cost Of Revenue,54719000000.0,69702000000.0,48186000000.0,46179000000.0
Gross Profit,42559000000.0,54243000000.0,35174000000.0,35255000000.0
Research Development,6387000000.0,6306000000.0,5772000000.0,5717000000.0
Selling General Administrative,6193000000.0,6449000000.0,5616000000.0,5412000000.0
Non Recurring,,,,
Other Operating Expenses,,,,
Total Operating Expenses,67299000000.0,82457000000.0,59574000000.0,57308000000.0
Operating Income,29979000000.0,41488000000.0,23786000000.0,24126000000.0
Total Other Income Expense Net,160000000.0,-247000000.0,-538000000.0,243000000.0
