In [4]:
import psycopg2 as pg
import pandas as pd
import matplotlib.pyplot as plt

from config import DATABASE_URI

idx = pd.IndexSlice

In [2]:
sql = """
SELECT
    date,
    ticker,
    closeadj
FROM
    prices
WHERE
    date IN (
        SELECT
            MAX(date)
        FROM
            prices
        WHERE
            frequency = 'DAILY'
        GROUP BY
            EXTRACT(
                MONTH
                FROM
                    date
            ),
            EXTRACT(
                YEAR
                FROM
                    date
            )
    )
ORDER BY
    date ASC;
"""

with pg.connect(DATABASE_URI) as conn:
    with conn.cursor() as cur:
        cur.execute(sql)
        results = cur.fetchall()

prices = pd.DataFrame(results, columns=["date", "ticker", "closeadj"])
# There aren't duplicates here usually, but drop them just to be sure
prices = prices.drop_duplicates(subset=["date", "ticker"])
prices = prices.set_index(["date", "ticker"], verify_integrity=True).dropna()

In [35]:
def do_reindex(df, prices):
    """
    This is some really convoluted shit to reindex the fundamentals dataframe
    to end of month like the prices dataframe while forward filling fundmental information
    from the last quarterly filing.
    Pandas multiindex is just not designed to do what I want, so this is what has to be done
    """
    # Turn ticker index into column
    df = df.reset_index(level=1)
    # For each ticker, reindex according to prices dataframe date index, limit forward filling to 12 months
    # to account for some companies not regularly filing quarterly reports
    df = df.groupby("ticker").apply(lambda x: x.reindex(index=prices.index.levels[0], method="ffill", limit=12))
    # Tickers column gets duplicated in the index, so drop it
    df = df.drop(columns=["ticker"])
    # Index order gets reversed, so un-reverse it and sort
    return df.reorder_levels(["date", "ticker"]).sort_index()

In [38]:
sql = """
SELECT
    datekey,
    ticker,
    pe
FROM
    fundamentals
WHERE
    dimension = 'ARQ'
ORDER BY
    datekey ASC;
"""

with pg.connect(DATABASE_URI) as conn:
    with conn.cursor() as cur:
        cur.execute(sql)
        results = cur.fetchall()

fundamentals = pd.DataFrame(results, columns=["datekey", "ticker", "pe"])
# For some reason when fetching prices the column already has datetime type,
# but it doesn't here, so we have to manually convert it
fundamentals["datekey"] = pd.to_datetime(fundamentals["datekey"])
# Drop duplicates for setting index. There are some here for reasons that are beyond me.
fundamentals = fundamentals.drop_duplicates(subset=["datekey", "ticker"])
fundamentals = fundamentals.set_index(["datekey", "ticker"], verify_integrity=True)
fundamentals = do_reindex(fundamentals, prices).dropna()

In [1]:
import data

In [2]:
columns = ["pe", "divyield", "marketcap"]
test = data.get_data(columns)