# [WRDS Dummy Data](https://www.tidy-finance.org/python/wrds-dummy-data.html)

In [2]:
import pandas as pd
import numpy as np
import sqlite3
import string

In [3]:
tidy_finance = sqlite3.connect(database="data/tidy_finance_python.sqlite")

In [4]:
np.random.seed(1234)

start_date = pd.Timestamp("2003-01-01")
end_date = pd.Timestamp("2022-12-31")

dummy_years = np.arange(start_date.year, end_date.year+1, 1)
dummy_months = pd.date_range(start_date, end_date, freq="ME")
dummy_days = pd.date_range(start_date, end_date, freq="D")

# [Create Stock Dummy Data](https://www.tidy-finance.org/python/wrds-dummy-data.html#create-stock-dummy-data)

In [5]:
number_of_stocks = 100

industries = pd.DataFrame({
    "industry": ["Agriculture", "Construction", "Finance",
                 "Manufacturing", "Mining", "Public", "Retail",
                 "Services", "Transportation", "Utilities", "Wholesale"],
    "n": [81, 287, 4682, 8584, 1287, 1974, 1571, 4277, 1249, 457, 904],
    "prob": [0.00319, 0.0113, 0.185, 0.339, 0.0508, 0.0779, 
           0.0620, 0.169, 0.0493, 0.0180, 0.03451]
})

exchanges = pd.DataFrame({
    "exchange": ["AMEX", "NASDAQ", "NYSE"],
    "n": [2893, 17236, 5553],
    "prob": [0.113, 0.671, 0.216]
})

stock_identifiers_list = []
for x in range(1, number_of_stocks+1):
    exchange = np.random.choice(exchanges["exchange"], p=exchanges["prob"])
    industry = np.random.choice(industries["industry"], p=industries["prob"])

    exchcd_mapping = {
        "NYSE": np.random.choice([1, 31]),
        "AMEX": np.random.choice([2, 32]),
        "NASDAQ": np.random.choice([3, 33])
    }

    siccd_mapping = {
        "Agriculture": np.random.randint(1, 1000),
        "Mining": np.random.randint(1000, 1500),
        "Construction": np.random.randint(1500, 1800),
        "Manufacturing": np.random.randint(1800, 4000),
        "Transportation": np.random.randint(4000, 4900),
        "Utilities": np.random.randint(4900, 5000),
        "Wholesale": np.random.randint(5000, 5200),
        "Retail": np.random.randint(5200, 6000),
        "Finance": np.random.randint(6000, 6800),
        "Services": np.random.randint(7000, 9000),
        "Public": np.random.randint(9000, 10000)
    }

    stock_identifiers_list.append({
        "permno": x,
        "gvkey": str(x+10000),
        "exchange": exchange,
        "industry": industry,
        "exchcd": exchcd_mapping[exchange],
        "siccd": siccd_mapping[industry]
    })

stock_identifiers = pd.DataFrame(stock_identifiers_list)

In [6]:
stock_panel_yearly = pd.DataFrame({
    "gvkey": np.tile(stock_identifiers["gvkey"], len(dummy_years)),
    "year": np.repeat(dummy_years, len(stock_identifiers))
})

stock_panel_monthly = pd.DataFrame({
    "permno": np.tile(stock_identifiers["permno"], len(dummy_months)),
    "gvkey": np.tile(stock_identifiers["gvkey"], len(dummy_months)),
    "month": np.repeat(dummy_months, len(stock_identifiers)),
    "siccd": np.tile(stock_identifiers["siccd"], len(dummy_months)),
    "industry": np.tile(stock_identifiers["industry"], len(dummy_months)),
    "exchcd": np.tile(stock_identifiers["exchcd"], len(dummy_months)),
    "exchange": np.tile(stock_identifiers["exchange"], len(dummy_months))
})

stock_panel_daily = pd.DataFrame({
    "permno": np.tile(stock_identifiers["permno"], len(dummy_days)),
    "date": np.repeat(dummy_days, len(stock_identifiers))
})

## [Dummy beta table](https://www.tidy-finance.org/python/wrds-dummy-data.html#dummy-beta-table)

In [7]:
beta_dummy = (stock_panel_monthly
    .assign(
        beta_monthly=np.random.normal(
            loc=1, scale=1, size=len(stock_panel_monthly)
    ),
        beta_daily=lambda x: (
            x["beta_monthly"]+np.random.normal(scale=0.01, size=len(x))
        )
    )
)

(beta_dummy
    .to_sql(name="beta",
            con=tidy_finance,
            if_exists="replace",
            index = False)
)

24000

## [Dummy compustat table](https://www.tidy-finance.org/python/wrds-dummy-data.html#dummy-compustat-table)

In [8]:
relevant_columns = [
    "seq", "ceq", "at", "lt", "txditc", "txdb", "itcb",
    "pstkrv", "pstkl", "pstk", "capx", "oancf", "sale",
    "cogs", "xint", "xsga", "be", "op", "at_lag", "inv"
]

commands = {
    col: np.random.rand(len(stock_panel_yearly)) for col in relevant_columns
}

compustat_dummy = (
    stock_panel_yearly
    .assign(
        datadate=lambda x: pd.to_datetime(x["year"].astype(str)+"-12-31")
    )
    .assign(**commands)
)

(compustat_dummy
    .to_sql(name="compustat",
            con=tidy_finance,
            if_exists="replace",
            index=False)
)

2000

## [Dummy crsp_monthly table](https://www.tidy-finance.org/python/wrds-dummy-data.html#dummy-crsp_monthly-table)

In [9]:
crsp_monthly_dummy = (stock_panel_monthly
    .assign(
        date=lambda x: x["month"]+pd.offsets.MonthEnd(-1),
        ret=lambda x: np.fmax(np.random.normal(size=len(x)), -1),
        ret_excess=lambda x: (
            np.fmax(x["ret"]-np.random.uniform(0, 0.0025, len(x)), -1)
        ),
        shrout=1000*np.random.uniform(1, 50, len(stock_panel_monthly)),
        altprc=np.random.uniform(0, 1000, len(stock_panel_monthly))
    )
    .assign(mktcap=lambda x: x["shrout"]*x["altprc"])
    .sort_values(by=["permno", "month"])
    .assign(
        mktcap_lag=lambda x: (x.groupby("permno")["mktcap"].shift(1))
    )
    .reset_index(drop=True)
)

(crsp_monthly_dummy
    .to_sql(name="crsp_monthly",
            con=tidy_finance,
            if_exists="replace",
            index=False)
)

24000

## [Dummy crsp_daily table](https://www.tidy-finance.org/python/wrds-dummy-data.html#dummy-crsp_daily-table)

In [10]:
crsp_daily_dummy = (stock_panel_daily
    .assign(
        month=lambda x: x["date"]-pd.offsets.MonthBegin(1),
        ret_excess=lambda x: np.fmax(np.random.normal(size=len(x)), -1)
    )
    .reset_index(drop=True)
)

(crsp_daily_dummy
    .to_sql(name="crsp_daily",
            con=tidy_finance,
            if_exists="replace",
            index=False)
)

730500