## How to conduct Industry Neutralization

### Neutralize inside `calc` method

In [1]:
from frozen.engine import FrozenBt
from frozen.factor import *
from frozen.utils import Universe
from frozen.utils import IndustryManager
from frozen.data.database import DatabaseTypes
from frozen.utils.calendar import CalendarTypes

class FactorFactory(FrozenBt):

    def __init__(self):
        super().__init__(__vsc_ipynb_file__)
    
    def univ(self):
        universe = Universe(self.config)
        self.universe = universe.pool
    
    def prepare_data(self):
        data_definitions = [
            ('stock_daily_hfq', ('close', 'pct_chg'), ('close', 'returns')),
        ]
        return self.dataloader.load_batch(data_definitions, self.universe, start_date=self.start_date_lookback, end_date=self.end_date)
    
    def calc(self):
        string = "SignedPower(where(returns < 0 ? stddev(returns, 5) : close), 2.0)"
        alpha = calc_str(string, self.prepare_data())
        manager = IndustryManager(database_type=DatabaseTypes(self.config.database))
        industry_mapping = manager.get_industry_time_series(self.universe, start_date=self.start_date_lookback, end_date=self.end_date, classification="sw_l1", calendar_type=CalendarTypes.SSE)
        neutralized_alpha = industry_neutralize(alpha, industry_mapping, method="regression")
        return neutralized_alpha

  from tqdm.autonotebook import tqdm


### Use Factor embedded method

In [2]:
from frozen.engine import FrozenBt
from frozen.factor import *
from frozen.utils import Universe
from frozen.data.database import DatabaseTypes
from frozen.utils.calendar import CalendarTypes

class FactorFactory(FrozenBt):

    def __init__(self):
        super().__init__(__vsc_ipynb_file__)
    
    def univ(self):
        universe = Universe(self.config)
        self.universe = universe.pool
    
    def prepare_data(self):
        data_definitions = [
            ('stock_daily_hfq', ('close', 'pct_chg'), ('close', 'returns')),
        ]
        return self.dataloader.load_batch(data_definitions, self.universe, start_date=self.start_date_lookback, end_date=self.end_date)
    
    def calc(self):
        string = "SignedPower(where(returns < 0 ? stddev(returns, 5) : close), 2.0)"
        alpha = calc_str(string, self.prepare_data())
        return alpha

In [3]:
factory = FactorFactory()
factor = factory.calc()



In [4]:
neutralized_factor = factor.industry_neutralize(method="demean", classification="sw_l1", database_type=DatabaseTypes.DUCKDB, calendar_type=CalendarTypes.SSE)

### Neutralize Factor in MultiIndex format

In [5]:
from frozen.engine import FrozenBt
from frozen.factor import *
from frozen.utils import Universe
from frozen.utils import IndustryManager
from frozen.data.database import DatabaseTypes
from frozen.utils.calendar import CalendarTypes

class FactorFactory(FrozenBt):

    def __init__(self):
        super().__init__(__vsc_ipynb_file__)

    def univ(self):
        universe = Universe(self.config)
        self.universe = universe.pool

    def prepare_data(self):

        data_definitions = [
            ("stock_daily_hfq", ("high", "low", "close", "pct_chg"), ("high", "low", "close", "returns")),
            ("stock_daily_fundamental", "pe_ttm", "pe")
            ]
        
        return self.dataloader.load_batch(data_definitions, self.universe, self.start_date_lookback, self.end_date)

    def calc(self):

        str_list = []
        name_list = []

        str_list += ["rank(Ts_ArgMax(SignedPower(where(returns < 0 ? stddev(returns, 5) : close), 2.0), 5))"]
        name_list += ["alpha1"]

        str_list += ["where(ts_min(delta(close, 1), 5) > 0 ? delta(close, 1) : where((ts_max(delta(close, 1), 5) < 0) ? delta(close, 1) : (mul(delta(close, 1), -1))))"]
        name_list += ["alpha2"]

        wms = "(ts_max(high, 12) - close) / (ts_max(high, 12) - ts_min(low, 12)) * 100"
        roc = "(close - delay(close, 12)) / delay(close, 12)"
        bias = "close * mean(close, 12) / mean(close, 12) * 100"

        pe = "normalize(pe, 30)"
        wms = f"normalize({wms}, 30)"
        roc = f"normalize({roc}, 30)"
        bias = f"normalize({bias}, 30)"

        str_list += [f"{pe} * 0.0415 - {wms} * 0.1512 + {roc} * 0.1427 + {bias} * 0.6652"]
        name_list += ['alpha3']

        alpha = batch_calc(str_list, name_list, self.prepare_data(), parallel=True)

        manager = IndustryManager(database_type=DatabaseTypes(self.config.database))
        industry_mapping = manager.get_industry_time_series(self.universe, start_date=self.start_date_lookback, end_date=self.end_date, classification="sw_l1", calendar_type=CalendarTypes.SSE, multiindex=True)
        neutralized_alpha = industry_neutralize(alpha, industry_mapping, method="regression", multiindex=True)

        return neutralized_alpha