# Standardized Data With Timestamps Suitable For Backtesting

### [Data Dictionary](https://www.calcbench.com/home/standardizedmetrics)
### [Documentation](http://calcbench.github.io/python_api_client/html/numeric-data.html#point-in-time)
### `date_reported` is the timestamp when Calcbench published the data and users would have had access ot it.
### `revision_number` greater than 0 indicates that the filer reported a different value for this fact that was previously reported.


In [None]:
import calcbench as cb
import pandas as pd
import datetime
import os
from tqdm import tqdm, tqdm_notebook
import logging

logging.getLogger("calcbench").setLevel(logging.DEBUG)
cb.enable_backoff(giveup=lambda e: e.response.status_code == 404)

In [None]:
tickers = cb.tickers(index="DJIA")

## Get Historic Data

In [None]:
output_file_name = r"C:\Users\andre\documents\calcbench_pit_preliminary.csv"
include_preliminary = True  # Include values from earnings press-releases and 8-Ks
for index, ticker in enumerate(tqdm_notebook(tickers)):
    try:
        tqdm.write(f"Getting {ticker}")
        pit_data = cb.point_in_time(
            all_face=True,
            all_footnotes=False,
            company_identifiers=[ticker],
            all_history=True,
            include_preliminary=include_preliminary,
            include_xbrl=True,
        )
        if pit_data.empty:
            continue
    except KeyboardInterrupt:
        raise
    except Exception as e:
        tqdm.write(f"Exception getting {ticker} {e}")
    else:
        first_write = index == 0
        pit_data.to_csv(
            output_file_name,
            mode="w" if first_write else "a",
            index=False,
            header=first_write,
        )

## New Filings Queue
### Run the below code in a daemon

The `handle_filing` function will get called every time a new filing is published.

Calcbench pushes messages onto the queue when data is available, typically a few minutes after the SEC publishes the data.

Messages will remain in the queue for seven days, if the listening process goes down you will receive the messages when it is started again.

If the `handle_filing` function throws an error the message will be pushed back on the queue to re-try.


In [None]:
def get_filing_standardized_data(filing: cb.Filing):
    if not filing.standardized_XBRL:
        # We only want filings which include XBRL Calcbench standardizes.  This excludes S-1s.
        return
    accession_id = filing.calcbench_id
    filing_data = cb.point_in_time(
        accession_id=accession_id, all_face=True, all_footnotes=True
    )

    logger.info(f"Found {filing_data.shape} for {filing.ticker}")
    if filing_data.shape[0] == 0:
        msg = f"Found no data for {filing.ticker} {accession_id}"
        # If we didn't find any data there might be something holding up the process on Calcbench's side.  Throw an exception to try again later.
        raise Exception(msg)
    file_exists = Path(output_file_name).exists()
    filing_data[columns].to_csv(
        output_file_name,
        index=False,
        header=not file_exists,
        mode="a" if file_exists else "w",
    )

In [None]:
subscription = "talk to Calcbench to get a subscription"


cb.handle_filings(
    handler=get_filing_standardized_data,
    subscription_name=subscription,
)