In [48]:
from ycharts_parsers import YchartsDataVar
import polars as pl
from glob import glob
from tqdm import tqdm
from os import listdir

In [49]:
YCH_DF_PATH = f"./data/ycharts/"

In [62]:
def merge_parquets(path: str, shift=True, resample=True):
    df_pl_res = [pl.read_parquet(fp).drop("index") for fp in tqdm(glob(f"{path}/*.parquet"))]
    df_merged = df_pl_res[0]
    for df in tqdm(df_pl_res[1:]):
        df_merged = df_merged.join(df, on="date", how="outer")
    df_merged = df_merged.sort("date")
    df = df_merged.to_pandas().set_index("date").loc["2007":"2022"]
    if resample:
        df = df.resample("1Y").mean()
    if shift:
        df.index = df.index.shift(4, freq="MS")  # type: ignore
    return df

In [63]:
# Revenue
YCH_DATA_TYPE = YchartsDataVar.REVENUE
PARQUETS_PATH = f"./data/ycharts/{YCH_DATA_TYPE.value}_parqs"
len(listdir(PARQUETS_PATH))


2139

In [64]:
rev_df = merge_parquets(PARQUETS_PATH)
# merge_parquets(PARQUETS_PATH).to_csv(f"{YCH_DF_PATH}/{YCH_DATA_TYPE.value}_final.csv")

100%|██████████| 2139/2139 [00:01<00:00, 1973.74it/s]
100%|██████████| 2138/2138 [00:04<00:00, 440.77it/s]


In [66]:
# Total assets
YCH_DATA_TYPE = YchartsDataVar.TOTAL_ASSETS
PARQUETS_PATH = f"./data/ycharts/{YCH_DATA_TYPE.value}_parqs"
len(listdir(PARQUETS_PATH))


2139

In [67]:
ta_df = merge_parquets(PARQUETS_PATH, shift=False)
# merge_parquets(PARQUETS_PATH).to_csv(f"{YCH_DF_PATH}/{YCH_DATA_TYPE.value}_final.csv")

100%|██████████| 2139/2139 [00:01<00:00, 1653.90it/s]
100%|██████████| 2138/2138 [00:04<00:00, 456.45it/s]


In [74]:
# Calculate the mean of two previous dates column-wise
average_assets = ta_df.rolling(2, min_periods=2).mean().shift(4, freq="MS").iloc[1:]
average_assets.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 15 entries, 2009-04-01 to 2023-04-01
Columns: 2139 entries, A to SEKEY
dtypes: float64(2139)
memory usage: 250.8 KB


In [76]:
rev_df = rev_df.iloc[1:]
rev_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 15 entries, 2009-04-01 to 2023-04-01
Columns: 2139 entries, A to SEKEY
dtypes: float64(2139)
memory usage: 250.8 KB


In [77]:
sorted(average_assets) == sorted(rev_df)

True

$$\text{Asset turnover ratio} = \dfrac{\text{Revenue}}{\text{Average total assets}}$$

In [79]:
asset_turnover = rev_df / average_assets

In [80]:
asset_turnover.isna().sum(1)

date
2009-04-01    59
2010-04-01    33
2011-04-01    13
2012-04-01     2
2013-04-01     0
2014-04-01     0
2015-04-01     1
2016-04-01     0
2017-04-01     0
2018-04-01     1
2019-04-01     2
2020-04-01     2
2021-04-01     1
2022-04-01     7
2023-04-01    64
dtype: int64

In [86]:
asset_turnover.mean().sort_values()

DBOEY     0.016985
OSCUF     0.023615
CMPNF     0.038124
HYSNF     0.039543
CKISF     0.045927
           ...    
AE        7.607255
INT       7.779528
SBR       8.069753
FULO      8.616490
DIT      10.200208
Length: 2139, dtype: float64

In [82]:
asset_turnover.to_csv(f"{YCH_DF_PATH}/asset_turnover.csv")