In [5]:
%load_ext autoreload
%autoreload 2
import warnings
warnings.filterwarnings('ignore')
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

import pandas as pd
import numpy as np
import lightgbm as lgbm
import os
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import json
import time
import pickle

from tqdm import tqdm

from utils import load_json
from features import QuarterlyFeatures, BaseCompanyFeatures, FeatureMerger
from targets import QuarterlyTarget
from models import GroupedOOFModel
import pipelines.marketcap
from data import SF1Data
sns.set()

config = load_json("config.json")


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
data_loader = SF1Data(config['sf1_data_path'])
tickers_df = data_loader.load_tickers(
    currency='USD',
    scalemarketcap=['4 - Mid', '5 - Large', '6 - Mega'])
ticker_list = tickers_df['ticker'].unique().tolist()

In [6]:
columns = ['revenue', 'netinc', 'ncf', 'assets', 'ebitda', 'debt', 'fcf',
           'gp', 'workingcapital', 'cashneq', 'rnd', 'sgna', 'ncfx',
           'divyield', 'currentratio', 'netinccmn']

cat_columns = ['sector', 'sicindustry']

fc1 = QuarterlyFeatures(columns=columns,
                        quarter_counts=[2, 4, 10],
                        max_back_quarter=10)

fc2 = BaseCompanyFeatures(cat_columns=cat_columns)

feature = FeatureMerger(fc1, fc2, on='ticker')
target = QuarterlyTarget(col='marketcap', quarter_shift=0)
model = GroupedOOFModel(lgbm.sklearn.LGBMRegressor(), fold_cnt=5)
                
mc_pipeline = pipelines.marketcap.MarketcapPipeline(feature, target, model)
mc_pipeline.fit(config, ticker_list)
mc_pipeline.export_core('models_data')

3874it [00:30, 126.44it/s]
2400it [00:03, 603.73it/s]


0.38807835112942357


In [None]:
0.38879646304551746

In [None]:
0.3651129822271524

In [7]:
imp_df = pd.DataFrame()
imp_df['column'] = mc_pipeline.model.columns
imp_df['importance'] = mc_pipeline.model.base_models[0].feature_importances_
imp_df = imp_df.sort_values('importance', ascending=False)[:]
imp_df[:10]

Unnamed: 0,column,importance
481,sicindustry,178
480,sector,50
361,quarter10_ebitda_median,45
455,quarter10_divyield_diff_mean,38
130,quarter2_divyield_mean,34
401,quarter10_workingcapital_median,34
404,quarter10_workingcapital_std,31
331,quarter10_netinc_median,30
135,quarter2_divyield_diff_mean,28
411,quarter10_cashneq_median,27


In [11]:
pipeline_path = 'models_data/marketcap_pipeline_16.02.21_15:11'
mc_pipeline = pipelines.marketcap.load(pipeline_path)

In [13]:
mc_df = mc_pipeline.execute(config, ticker_list)

3874it [00:30, 128.52it/s]


In [9]:
"wef" \
"fewf"

'weffewf'