In [1]:
from data import get_data
import pandas as pd
from scipy import stats
import numpy as np
import matplotlib.pyplot as plt

idx = pd.IndexSlice

- Goals:
    - Balance sheet health (net cash per share growth rate)
    - Earnings growth
    - Value for earnings (earnings + div yield / PE)
    - leverage (debt equity)
    - Free cash flow per share / price
- Filters:
    - No infs
    - Share factor == 1
    - 10,000,000 < market cap < 5,000,000,000
    - 0 < EPS Growth < 1
    - 0 < Debt Equity < 10
    - 0 <= Dividend Yield < 0.25
    - 0 < PE < 50

In [2]:
columns = ["eps", "cashneq", "investments", "debt", "sharesbas", "pe", "divyield", "de", "fcfps", "sharefactor"]

In [3]:
data = get_data(columns)

data["marketcap"] = data["close"] * data["sharesbas"]
# Cash coverage ratio
data["cash_cov"] = (data["cashneq"] + data["investments"]) / data["debt"]
# Free cash flow per share / price
data["cfpr"] = data["fcfps"] / data["close"]

data["forward_ret"] = data["close"].groupby("ticker").pct_change()
data["forward_ret"] = data["forward_ret"].groupby("ticker").shift(-1)

data = data[data["pe"] > 0]
data = data[data["sharefactor"] == 1]

data = data.dropna()

In [26]:
def growth(df):
    """
    Calculates CAGR over past 3 years given monthly data points
    """
    df = df.droplevel("ticker")
    diff = df.iloc[36:] / df.iloc[:-36].values
    return np.power(diff, 1/3) - 1

In [62]:
data = get_data(columns)

data["marketcap"] = data["close"] * data["sharesbas"]
# Net cash per share
data["net_cash"] = ((data["cashneq"] + data["investments"]) - data["debt"]) / data["sharesbas"]
# Apply returns dataframe with indexes reversed so we have to un-reverse them
data["net_cash_change"] = data["net_cash"].groupby("ticker").apply(growth).reorder_levels([1, 0])
data["eps_growth"] = data["eps"].groupby("ticker").apply(growth).reorder_levels([1, 0])
# Free cash flow per share / price
data["cfpr"] = data["fcfps"] / data["close"]
data["eps_pe"] = (data["eps_growth"] + data["divyield"]) / data["pe"]

data["forward_ret"] = data["close"].groupby("ticker").pct_change()
data["forward_ret"] = data["forward_ret"].groupby("ticker").shift(-1)

# Filters
data = data[data["sharefactor"] == 1]
data = data[data["marketcap"].between(1e7, 5e9)]
data = data[data["eps_growth"].between(0, 1)]
data = data[data["de"].between(0, 10)]
data = data[data["divyield"].between(0, 0.25)]
data = data[data["pe"].between(0, 50)]
data = data.replace([np.inf, -np.inf], np.nan)
data = data.dropna()

In [127]:
features = data[["pe", "divyield", "de", "marketcap", "net_cash", "net_cash_change", "eps_growth", "cfpr", "eps_pe"]]

In [184]:
ranks = features.groupby("date").rank()

In [186]:
test = ranks["pe"]

In [193]:
def form_deciles(df):
    df = df.droplevel("date")
    

date        ticker
2000-12-29  AIND        4.0
            BSNX        7.0
            CMRN       11.0
            DS1        12.0
            HDLM        2.0
                      ...  
2021-07-30  WTM        34.0
            XOMA      525.0
            XRX       403.0
            YORW      541.0
            ZEUS      447.0
Name: pe, Length: 181195, dtype: float64