In [1]:
import numpy as np
import pandas as pd
from itertools import combinations

In [2]:
raw_2_df = pd.read_csv("./example_2.csv")

In [3]:
def raw2base(raw_df):
    df = raw_df.copy()
    df["stock_code"] = df["stock_code"].apply(lambda x: str(x).zfill(6))
    df.set_index("stock_code", inplace=True)
    return df

##
base_df = raw2base(raw_2_df)

In [4]:
def base2main(base_df, factors, target):
    factor_df = base_df.loc[:, factors]
    target_df = base_df.loc[:, target].rename("target")
    main_df = pd.concat([factor_df, target_df], axis=1)
    return main_df


##
factors = ["cpbr", "tpbr", "ctr", "npr", "opr"]
target = "profit"
main_df = base2main(base_df, factors, target)

In [5]:
class NdimAnalyser:
    def __init__(self, df, factors, target) -> None:
        self.df = df
        self.factors = factors
        self.target = target

    def get_factor_qct_df(self, qct_n):
        df = self.df
        factors = self.factors
        qct_factor_df = pd.concat(
            [pd.qcut(x=df[col], q=qct_n, labels=False) for col in factors], axis=1
        )
        return qct_factor_df

    def calc_combs(self, qct_df, factor_combs, n_d):
        results = list()
        for factor_comb in factor_combs:
            mvc_df = pd.concat(
                [
                    qct_df.groupby(list(factor_comb))[self.target].mean().rename("mean"),
                    qct_df.groupby(list(factor_comb))[self.target].var().rename("var"),
                    qct_df.groupby(list(factor_comb))[self.target].size().rename("count"),
                ],
                axis=1,
            )
            if n_d > 1:
                mvc_df["factor"] = [
                    {name: index for name, index in zip(mvc_df.index.names, idx)}
                    for idx in mvc_df.index
                ]
            else:
                mvc_df["factor"] = [{mvc_df.index.name: idx} for idx in mvc_df.index]
            mvc_df.reset_index(drop=True, inplace=True)
            results.append(mvc_df)
        result = pd.concat(results, axis=0)
        return result

    def __call__(self, n_d, qct_n):
        qct_df = pd.concat([self.get_factor_qct_df(qct_n), self.df.loc[:, self.target]], axis=1)

        factor_combs = list(combinations(self.factors, n_d))
        result = self.calc_combs(qct_df, factor_combs, n_d)
        return result


ndim_analyser = NdimAnalyser(main_df, factors, 'target')

In [6]:
ndim_analyser(n_d=1,qct_n=10).head()

Unnamed: 0,mean,var,count,factor
0,0.145588,0.080563,167,{'cpbr': 0}
1,0.084227,0.05766,167,{'cpbr': 1}
2,0.025713,0.037417,166,{'cpbr': 2}
3,0.088396,0.090076,167,{'cpbr': 3}
4,0.049663,0.052134,167,{'cpbr': 4}


In [7]:
ndim_analyser(n_d=2, qct_n=10).head()

Unnamed: 0,mean,var,count,factor
0,-0.176508,0.062855,6,"{'cpbr': 0, 'tpbr': 0}"
1,-0.00317,0.009057,4,"{'cpbr': 0, 'tpbr': 1}"
2,0.019961,0.023052,11,"{'cpbr': 0, 'tpbr': 2}"
3,0.02025,0.022746,9,"{'cpbr': 0, 'tpbr': 3}"
4,0.123144,0.08086,13,"{'cpbr': 0, 'tpbr': 4}"


In [8]:
ndim_analyser(n_d=3, qct_n=5).head()

Unnamed: 0,mean,var,count,factor
0,0.032245,0.086469,57,"{'cpbr': 0, 'tpbr': 0, 'ctr': 0}"
1,0.001071,0.003699,5,"{'cpbr': 0, 'tpbr': 0, 'ctr': 4}"
2,0.063677,0.058752,62,"{'cpbr': 0, 'tpbr': 1, 'ctr': 0}"
3,0.105117,0.057328,58,"{'cpbr': 0, 'tpbr': 2, 'ctr': 0}"
4,0.080359,,1,"{'cpbr': 0, 'tpbr': 2, 'ctr': 1}"


In [9]:

ndim_analyser(n_d=4, qct_n=5).head()

Unnamed: 0,mean,var,count,factor
0,-0.000778,0.084291,38,"{'cpbr': 0, 'tpbr': 0, 'ctr': 0, 'npr': 0}"
1,0.084855,0.129249,11,"{'cpbr': 0, 'tpbr': 0, 'ctr': 0, 'npr': 1}"
2,-0.012311,0.008516,5,"{'cpbr': 0, 'tpbr': 0, 'ctr': 0, 'npr': 2}"
3,0.410907,,1,"{'cpbr': 0, 'tpbr': 0, 'ctr': 0, 'npr': 3}"
4,0.292389,0.036408,2,"{'cpbr': 0, 'tpbr': 0, 'ctr': 0, 'npr': 4}"
