In [None]:
import calcbench as cb
import pandas as pd
import math
import numpy as np
from decimal import Decimal
from tqdm import tqdm, tqdm_notebook
from IPython.core.debugger import set_trace
from tqdm.notebook import tqdm
import random
import qgrid
from sklearn.metrics import confusion_matrix

cb.enable_backoff()

tqdm.pandas(desc="my bar!")

In [None]:
tickers = cb.tickers(index="DJIA")

In [228]:
relevant_columns = [
    "preliminary",
    "revision_number",
    "value",
    "trace_url",
    "XBRL",
    "filing_type",
]

In [229]:
data = pd.DataFrame()
for ticker in tqdm(tickers[:2]):
    d = cb.point_in_time(
        company_identifiers=[ticker],
        all_face=True,
        # metrics=["EBIT"],
        include_preliminary=True,
        include_xbrl=True,
        all_history=True,
        period_type="quarterly",
        include_trace=True,
    )
    data = data.append(d)
data.reset_index(inplace=True, drop=True)

data = data[data.fiscal_period.isin([1, 2, 3, 4])]
data["period"] = pd.PeriodIndex(
    year=data.fiscal_year, quarter=data.fiscal_period, freq="Q"
)
data = data.set_index(
    [
        "ticker",
        "metric",
        "period",
    ]
)
data = data[relevant_columns]

  0%|          | 0/2 [00:00<?, ?it/s]

In [224]:
def preliminary_and_revision(group: pd.DataFrame) -> bool:
    """
    Does this group have a preliminary line and an XBRL line?
    """
    unconfirmed_preliminary = group[group.preliminary & ~group.XBRL].shape[0]
    revisions = group.revision_number > 0
    XBRL_revision = group[group.XBRL & revisions].shape[0]
    return bool(unconfirmed_preliminary and XBRL_revision)

In [None]:
def error_type(g):
    preliminary_row = g[g.preliminary].iloc[0]
    preliminary_value = preliminary_row.value
    XBRL_value = g[~g.preliminary].iloc[0].value
    preliminary_decimal = Decimal(preliminary_value).normalize().as_tuple()
    XBRL_decimal = Decimal(XBRL_value).normalize().as_tuple()
    diff = (preliminary_value - XBRL_value) / XBRL_value
    d = pd.DataFrame(g)
    d["diff"] = diff
    if preliminary_decimal.digits == XBRL_decimal.digits:
        if preliminary_decimal.exponent == XBRL_decimal.exponent:
            break_type = "sign"
        else:
            break_type = "scale"
    elif math.isclose(preliminary_value, XBRL_value, rel_tol=0.05):
        break_type = "close_enough"
    elif math.isclose(preliminary_value * 4, XBRL_value, rel_tol=0.1):
        break_type = "quarter_error"
    else:
        break_type = "unknown"
    d["break_type"] = break_type
    g[g.preliminary]["error"] = break_type
    return d

In [231]:
groups_with_preliminary_and_revision = data.groupby(
    data.index
).filter(preliminary_and_revision)

In [240]:
all_breaks = groups_with_preliminary_and_revision.groupby(
    groups_with_preliminary_and_revision.index
).progress_apply(error_type)

my bar!:   0%|          | 0/741 [00:00<?, ?it/s]

  diff = (preliminary_value - XBRL_value) / XBRL_value


In [257]:
qgrid.show_grid(data[data.preliminary].set_index('revision_number', append=True).merge(
    all_breaks[all_breaks.preliminary].set_index('revision_number', append=True)['break_type'],
    how="left",
    left_index=True,
    right_index=True
))

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…