In [1]:
import sqlite3

# Connect to the SQLite database
conn = sqlite3.connect('data/tidy_finance_r.sqlite')

# Create a cursor object to execute SQL queries
cursor = conn.cursor()

# Get a list of all tables in the database
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()

# Extract table names from the result
table_names = [table[0] for table in tables]

# Print the list of table names
print("Tables in the database:")
for table_name in table_names:
    print(table_name)

# Close the connection
conn.close()

Tables in the database:
factors_ff3_monthly
factors_ff5_monthly
factors_ff3_daily
industries_ff_monthly
factors_q_monthly
macro_predictors
cpi_monthly
compustat
crsp_monthly


In [2]:
import pandas as pd
import numpy as np
import sqlite3
import statsmodels.formula.api as smf

from regtabletotext import prettify_result
from statsmodels.regression.rolling import RollingOLS
from plotnine import *
from mizani.breaks import date_breaks
from mizani.formatters import percent_format, date_format
from joblib import Parallel, delayed, cpu_count
from itertools import product


tidy_finance = sqlite3.connect(database="data/tidy_finance_r.sqlite")

crsp_monthly = (pd.read_sql_query(
    sql="SELECT permno, month, industry, ret_excess FROM crsp_monthly",
    con=tidy_finance,
    parse_dates={"month"})
  .dropna()
)

factors_ff3_monthly = pd.read_sql_query(
  sql="SELECT month, mkt_excess FROM factors_ff3_monthly",
  con=tidy_finance,
  parse_dates={"month"}
)

crsp_monthly = (crsp_monthly
  .merge(factors_ff3_monthly, how="left", on="month")
)


In [3]:
model_beta = (smf.ols(
    formula="ret_excess ~ mkt_excess",
    data=crsp_monthly.query("permno == 14593"))
  .fit()
)
prettify_result(model_beta)

OLS Model:
ret_excess ~ mkt_excess

Coefficients:
            Estimate  Std. Error  t-Statistic  p-Value
Intercept      0.010       0.005        2.003    0.046
mkt_excess     1.389       0.111       12.467    0.000

Summary statistics:
- Number of observations: 504
- R-squared: 0.236, Adjusted R-squared: 0.235
- F-statistic: 155.423 on 1 and 502 DF, p-value: 0.000



In [4]:
window_size = 60
min_obs = 48

valid_permnos = (crsp_monthly
  .dropna()
  .groupby("permno")["permno"]
  .count()
  .reset_index(name="counts")
  .query(f"counts > {window_size}+1")
)