# Build Fundamentals

In [None]:
import wrds
import numpy as np
import pandas as pd
from pandas.tseries.offsets import DateOffset

In [None]:
prc_stmt = """
SELECT
    prc.gvkey,
    prc.datadate AS date,
    cshoc,
    prccd
FROM ( SELECT DISTINCT
        gvkey,
        iid
    FROM
        comp_global_daily.g_funda
    WHERE
        exchg = ANY (ARRAY [104, 132, 151, 154, 171, 172, 192, 194, 201, 209, 228, 256, 257, 273, 286])
        AND curcd = 'EUR') AS fund
    JOIN comp_global_daily.g_sec_dprc AS prc ON fund.gvkey = prc.gvkey
        AND fund.iid = prc.iid
WHERE
    curcdd = 'EUR'
    AND cshtrd IS NOT NULL
    AND datadate >= '1999-01-01';
    AND gvkey = '221102'
"""

In [None]:
fund_stmt = """
SELECT
    gvkey,
    datadate AS date,
    loc AS country,
    indfmt AS industry,
    sicdesc AS classification,
    act,
    at AS att,
    capx,
    ceq,
    che,
    chee, 
    dlc,
    dltis,
    dltr,
    dltt,
    dp,
    dv,
    dvt,
    dlcch,
    ebit,
    ebitda,
    gdwl,
    icapt,
    intan,
    invt,
    ivaeq,
    ivao,
    ivst,
    lct,
    lt AS ltt,
    mib,
    nicon,
    oancf,
    pi,
    ppegt,
    pstk,
    prstkc,
    revt,
    sstk,
    COALESCE(txp, txpfs) AS txp,
    xrd
FROM
    comp.g_funda AS fund
    JOIN comp.r_siccd AS sic ON fund.sich::VARCHAR = sic.siccd
WHERE
    exchg = ANY (ARRAY [104, 107, 132, 151, 154, 171, 192, 194, 201, 209, 256, 257, 273, 276, 286])
    AND curcd = 'EUR'
    AND gvkey = '221102'
ORDER BY
    gvkey, datadate;
"""

In [None]:
def query_wrds(sql_stmt):
    with wrds.Connection(wrds_username="USERNAME") as db:
        data = db.raw_sql(
            sql_stmt,
            date_cols=["date", "datadate"],
        )
    return data

In [None]:
def query_fundamental():
    fund = (
        query_wrds(fund_stmt)
        .fillna(0)
        .astype(
            {
                "gvkey": "object",
                "country": "category",
                "industry": "category",
                "classification": "category",
            }
        )
    )
    return fund

In [None]:
def query_price():
    prc = query_wrds(prc_stmt)
    return prc

In [None]:
fund = query_fundamental()

In [None]:
prc = query_price()

In [None]:
def fill_year(df):
    first_date = df["date"].iloc[0]
    last_date = df["date"].iloc[-1]
    date_index = pd.date_range(
        pd.to_datetime(first_date), pd.to_datetime(last_date) + DateOffset(years=1), name="date"
    )
    return (
        df.drop("gvkey", axis=1)
        .set_index("date")
        .sort_index()
        .reindex(date_index, method="ffill")
    )

In [None]:
def build_fundamental(df):
    oa = df.att - df.che
    ol = df.att - df.dlc - df.dltt - df.mib - df.pstk - df.ceq
    chact = df.act - df.act.shift(1)
    chchee = df.chee - df.chee.shift(1)
    chlct = df.lct - df.lct.shift(1)
    chdlc = df.dlc - df.dlc.shift(1)
    chtxp = df.txp - df.txp.shift(1)
    chchee = df.chee - df.chee.shift(1)
    avg_at = (df.att + df.att.shift(1)) / 2
    nca = df.att - df.act - df.ivaeq
    ncl = df.ltt - df.lct - df.dltt
    ncoa = nca - ncl
    coa = df.act - df.che
    col = df.lct - df.dlc
    wc = df.act - df.che - df.lct + df.dlc
    fna = df.ivst + df.ivao
    fnl = df.dltt + df.dlc + df.pstk
    nfna = fna - fnl
    df = df.loc[:, :"classification"].assign(
        acc=((chact - chchee) - (chlct - chdlc - chtxp) - df.dp) / avg_at,
        chcoa=(coa - coa.shift(1)) / df.att.shift(1),
        chcol=(col - col.shift(1)) / df.att.shift(1),
        chnncwc=(wc - wc.shift(1)) / df.att.shift(1),
        chnncoa=(ncoa - ncoa.shift(1)) / df.att.shift(1),
        chncoa=(nca - nca.shift(1)) / df.att.shift(1),
        chncol=(ncl - ncl.shift(1)) / df.att.shift(1),
        chnfa=nfna - nfna.shift(1) / df.att.shift(1),
        chlti=(df.ivao - df.ivao.shift(1)) / df.att.shift(1),
        chce=(df.ceq - df.ceq.shift(1)) / df.att.shift(1),
        chfl=(df.dltt + df.dlc + df.pstk - (df.dltt + df.dlc + df.pstk).shift(1))
        / df.att.shift(1),
        grii=(df.invt - df.invt.shift(1)) / ((df.att + df.att.shift(1)) / 2),
        ich=(df.invt - df.invt.shift(1)) / df.att.shift(1),
        igr=(df.invt - df.invt.shift(1)) / df.invt.shift(1),
        nwcch=(wc - wc.shift(1)) / df.att.shift(1),
        ta=((ncoa + wc + nfna) - (ncoa + wc + nfna).shift(1)) / df.att.shift(1),
        agr=df.att / df.att.shift(1),
        chnoa=(((oa - ol) / df.att.shift(1)) - (((oa - ol) / df.att.shift(1)).shift(1)))
        / df.att.shift(1),
        chppeia=((df.ppegt - df.ppegt.shift(1)) + (df.invt - df.invt.shift(1)))
        / df.att.shift(1),
        cdi=np.log((df.dltt + df.dlc) / (df.dltt.shift(5) + df.dlc.shift(5))),
        griltnoa=(
            ((oa - ol) / df.att.shift(1))
            - (((oa - ol) / df.att.shift(1)).shift(1))
            - ((chact - chchee) - (chlct - chdlc - chtxp) - df.dp) / avg_at
        ),
        inv=(df.capx / df.revt)
        / (
            (
                (df.capx.shift(1) / df.revt.shift(1))
                + (df.capx.shift(2) / df.revt.shift(2))
                + (df.capx.shift(3) / df.revt.shift(3))
            )
            / 3
        ),
        ndf=(df.dltis - df.dltr + df.dlcch) / ((df.att + df.att.shift(1)) / 2),
        nef=(df.sstk - df.prstkc - df.dv) / ((df.att + df.att.shift(1)) / 2),
        noa=(oa - ol) / df.att.shift(1),
        noach=(ncoa - ncoa.shift(1)) / df.att,
        txfin=(df.sstk - df.dv - df.prstkc + df.dltis - df.dltr) / df.att,
        ir=(df.icapt - df.icapt.shift(1)) / (df.ebit * (df.nicon / df.pi)),
        nopat_g=(df.icapt - df.icapt.shift(1)) / df.icapt,
        rev_cagr_3=((df.revt / df.revt.shift(3)) ** (1 / 3)) - 1,
        ebitda_cagr_3=((df.ebitda / df.ebitda.shift(3)) ** (1 / 3)) - 1,
    )
    df.loc[:, "acc":] = df.loc[:, "acc":].fillna(0).replace(np.inf, 0)
    return df

In [None]:
fund_build = fund.groupby("gvkey").apply(build_fundamental).groupby("gvkey").apply(fill_year)

In [None]:
prc_build = prc.set_index(["gvkey", "date"])

In [None]:
df = fund_build.join(prc_build)