In [305]:
from sklearn.linear_model import Ridge, Lasso, LogisticRegression
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np

# Load the tables 
covid_df = pd.read_csv('../data/covid.csv', index_col=False)
precovid_df = pd.read_csv('../data/precovid.csv', index_col=False)
postcovid_df = pd.read_csv('../data/postcovid.csv', index_col=False)

### Lasso Regularization embedded feature selection

In [306]:
def get_embedded_report(inp_df, estimator, count, regularizer):
    
    input_df = inp_df.drop(columns={'Number of Workers','percent_layoff','Unnamed: 0'})
    X = input_df
    y = inp_df['Number of Workers']
    
    # First normalize our predictors
    X_numeric_scaled = StandardScaler().fit_transform(X)
    y_scaled     = StandardScaler().fit_transform(y.values.reshape(-1,1))

    # Save the columns name into variable named 'features'
    features = X.columns
    if regularizer=='randomforest':
        model = estimator.fit(X, y_scaled.ravel())
    else:
        model = estimator.fit(X, y_scaled)
        
    try:
        c = model.coef_
    except:
        c = model.feature_importances_
    finally:
        if regularizer=='lasso':
            output_df = pd.DataFrame(list(zip(features,c)), columns=['Features','Coefficient'])
            return output_df[output_df['Coefficient']>0].sort_values(by=['Coefficient'], ascending=False).reset_index(drop=True).head(count)
        else:
            output_df = pd.DataFrame(list(zip(features,c.flatten())), columns=['Features','Coefficient'])
            return output_df.sort_values(by=['Coefficient'], ascending=False).reset_index(drop=True).head(count)


In [307]:
count = 20
precovid_features = get_embedded_report(precovid_df, Lasso(alpha=0.1, tol=200), count, 'lasso')
covid_features = get_embedded_report(covid_df, Lasso(alpha=0.1, tol=200), count, 'lasso')
postcovid_features = get_embedded_report(postcovid_df, Lasso(alpha=0.1, tol=200), count, 'lasso')

In [308]:
print(precovid_features)
precovid_df[precovid_features['Features']]

                                           Features   Coefficient
0                                 Number of Layoffs  2.878397e-01
1                                 industry_labelled  2.976583e-04
2                                    employee_count  1.768844e-07
3                                 commonStockIssued  1.059667e-09
4                         deferredRevenueNonCurrent  1.623620e-10
5                            stockBasedCompensation  6.341012e-11
6                     inventory_cash-flow-statement  4.540658e-11
7                       totalOtherIncomeExpensesNet  3.945615e-11
8                             weightedAverageShsOut  3.233158e-11
9                            commonStockRepurchased  2.842469e-11
10                                 incomeTaxExpense  2.621922e-11
11                               capitalExpenditure  2.540272e-11
12  depreciationAndAmortization_cash-flow-statement  2.252727e-11
13                                        taxAssets  1.321505e-11
14        

Unnamed: 0,Number of Layoffs,industry_labelled,employee_count,commonStockIssued,deferredRevenueNonCurrent,stockBasedCompensation,inventory_cash-flow-statement,totalOtherIncomeExpensesNet,weightedAverageShsOut,commonStockRepurchased,incomeTaxExpense,capitalExpenditure,depreciationAndAmortization_cash-flow-statement,taxAssets,incomeBeforeTax,operatingExpenses,dividendsPaid,GeneralAndAdministrativeExpenses,accumulatedOtherComprehensiveIncomeLoss,deferrredTaxLiabilitiesNonCurrent
0,1,2,118033.0,0.0,5.249000e+09,0.000000e+00,0.000000e+00,-271000000.0,4.450080e+08,-1.700000e+07,2.200000e+08,-1.018000e+09,0.000000e+00,792000000.0,8.820000e+08,1.672000e+09,-4.400000e+07,0.0,-5.927000e+09,0.0
1,1,4,132000.0,1000000.0,2.878000e+09,1.351000e+09,1.693000e+09,672000000.0,1.952867e+10,-2.078300e+10,1.765000e+09,-3.267000e+09,2.665000e+09,0.0,1.328400e+10,7.809000e+09,-3.653000e+09,0.0,-3.111000e+09,398000000.0
2,1,4,137000.0,0.0,0.000000e+00,1.514000e+09,7.000000e+07,378000000.0,1.869628e+10,-2.370200e+10,2.232000e+09,-2.363000e+09,3.040000e+09,0.0,1.379300e+10,8.406000e+09,-3.443000e+09,0.0,-1.499000e+09,0.0
3,1,5,3641.0,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.0,9.541878e+07,0.000000e+00,6.879000e+06,0.000000e+00,0.000000e+00,0.0,6.675900e+07,0.000000e+00,-6.605200e+07,0.0,-3.900000e+07,0.0
4,1,11,6700.0,0.0,0.000000e+00,3.200000e+06,-4.350000e+07,-5600000.0,1.068000e+08,-9.100000e+06,-4.300000e+06,-8.000000e+06,1.830000e+07,132900000.0,6.100000e+06,1.111000e+08,-6.400000e+06,0.0,-4.674000e+08,174800000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
338,1,568,7700.0,0.0,0.000000e+00,5.300000e+06,0.000000e+00,300000.0,7.390000e+07,0.000000e+00,8.000000e+06,-2.650000e+07,2.620000e+07,34500000.0,6.110000e+07,7.230000e+07,-1.030000e+07,64900000.0,-1.538000e+08,11700000.0
339,2,570,11500.0,0.0,0.000000e+00,0.000000e+00,0.000000e+00,-35200000.0,4.233000e+08,-1.422000e+08,2.720000e+07,-1.850000e+07,6.110000e+07,0.0,1.622000e+08,3.409000e+08,-8.420000e+07,0.0,-1.831000e+08,161300000.0
340,1,570,11500.0,0.0,0.000000e+00,0.000000e+00,0.000000e+00,-29000000.0,4.195000e+08,-6.880000e+07,6.210000e+07,-3.420000e+07,6.700000e+07,0.0,1.975000e+08,3.247000e+08,-8.370000e+07,0.0,-2.090000e+08,152100000.0
341,1,571,9000.0,9565000.0,3.329270e+08,0.000000e+00,0.000000e+00,-3534000.0,6.194100e+07,-7.126200e+07,2.620700e+07,-2.356400e+07,0.000000e+00,15336000.0,9.231400e+07,9.364100e+07,-1.007100e+07,52980000.0,-6.418000e+07,173289000.0


In [309]:
print(covid_features)
covid_df[covid_features['Features']]


                                    Features   Coefficient
0                          Number of Layoffs  1.538622e-01
1                       incomeBeforeTaxRatio  3.954957e-03
2                                EBITDARatio  2.040811e-03
3                          industry_labelled  5.672192e-04
4                       operatingIncomeRatio  2.398573e-05
5                                 new_deaths  4.142861e-06
6                                  new_cases  1.046641e-06
7                             employee_count  1.298999e-07
8                     stockBasedCompensation  8.937824e-10
9                            interestExpense  6.841318e-10
10                         deferredIncomeTax  3.902665e-10
11                           acquisitionsNet  1.207244e-10
12                     weightedAverageShsOut  8.679326e-11
13             netIncome_cash-flow-statement  8.660510e-11
14                 deferredRevenueNonCurrent  6.859088e-11
15            ResearchAndDevelopmentExpenses  5.884265e-

Unnamed: 0,Number of Layoffs,incomeBeforeTaxRatio,EBITDARatio,industry_labelled,operatingIncomeRatio,new_deaths,new_cases,employee_count,stockBasedCompensation,interestExpense,deferredIncomeTax,acquisitionsNet,weightedAverageShsOut,netIncome_cash-flow-statement,deferredRevenueNonCurrent,ResearchAndDevelopmentExpenses,shortTermDebt,capitalExpenditure,netCashUsedProvidedByFinancingActivities,netIncome
0,2,-0.975418,-0.868263,2,-0.904822,890.315217,50113.815217,118033.0,0.0,340000000.0,0.0,0.0,5.090490e+08,0.000000e+00,7.043000e+09,0.000000e+00,4.446000e+09,-5.770000e+08,1.511000e+09,-2.399000e+09
1,3,-0.392465,-0.299900,2,-0.328094,2236.066667,116989.855556,118033.0,0.0,371000000.0,0.0,0.0,6.346090e+08,0.000000e+00,7.055000e+09,0.000000e+00,4.039000e+09,0.000000e+00,7.013000e+09,-1.250000e+09
2,1,0.100960,0.128970,3,0.105080,1372.780220,26741.428571,0.0,10029000.0,13422000.0,4435000.0,0.0,6.911800e+07,1.899600e+08,0.000000e+00,0.000000e+00,0.000000e+00,-5.713200e+07,-5.191350e+08,1.899600e+08
3,1,0.257147,0.446939,6,0.300237,1035.130435,105140.771739,47000.0,135000000.0,596000000.0,-34000000.0,-492000000.0,1.767807e+09,3.179000e+09,0.000000e+00,1.673000e+09,6.672000e+09,-2.170000e+08,-3.643000e+09,3.179000e+09
4,2,0.045879,0.063594,8,0.056882,1372.780220,26741.428571,7600.0,3100000.0,15900000.0,0.0,0.0,1.920000e+07,4.960000e+07,0.000000e+00,0.000000e+00,5.940000e+08,-9.100000e+06,-1.932000e+08,4.960000e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
264,1,0.085937,0.163907,571,0.069550,1372.780220,26741.428571,7100.0,6613000.0,8737000.0,-1891000.0,0.0,6.230900e+07,3.846500e+07,3.561750e+08,3.452200e+07,2.002820e+08,-9.711000e+06,-1.478450e+08,3.846500e+07
265,1,-0.284075,-0.253467,573,-0.193209,1372.780220,26741.428571,22514.0,0.0,64000000.0,-18000000.0,-3000000.0,1.753270e+08,-5.890000e+08,4.970000e+08,0.000000e+00,1.520000e+08,-1.730000e+08,1.323000e+09,-5.890000e+08
266,1,-0.581858,0.442221,574,-0.583956,1541.793478,134353.271739,0.0,0.0,313000000.0,0.0,770000000.0,4.270213e+09,-2.007000e+10,0.000000e+00,0.000000e+00,2.045800e+10,-3.629000e+09,-5.283000e+09,-2.007000e+10
267,1,-0.244395,-0.057481,576,-0.212597,1372.780220,26741.428571,5100.0,10200000.0,12300000.0,-24300000.0,0.0,2.187000e+08,-9.540000e+07,0.000000e+00,0.000000e+00,3.000000e+05,-1.320000e+07,6.581000e+08,-9.540000e+07


In [310]:
print(postcovid_features)
postcovid_df[postcovid_features['Features']]


                          Features   Coefficient
0                Number of Layoffs  8.126087e-02
1                          Quarter  7.895946e-02
2             operatingIncomeRatio  3.714804e-03
3                      EBITDARatio  2.950605e-03
4                industry_labelled  6.224978e-04
5                       new_deaths  1.991088e-05
6                   employee_count  7.044029e-08
7                commonStockIssued  2.676901e-09
8        deferredRevenueNonCurrent  2.305451e-10
9                 incomeTaxExpense  1.827654e-10
10     SellingAndMarketingExpenses  1.502206e-10
11                     taxPayables  1.270351e-10
12                accountsPayables  9.795497e-11
13          stockBasedCompensation  9.419168e-11
14                       taxAssets  7.962175e-11
15          commonStockRepurchased  5.938446e-11
16   inventory_cash-flow-statement  4.675649e-11
17                   dividendsPaid  4.293735e-11
18           weightedAverageShsOut  3.308481e-11
19  ResearchAndDevel

Unnamed: 0,Number of Layoffs,Quarter,operatingIncomeRatio,EBITDARatio,industry_labelled,new_deaths,employee_count,commonStockIssued,deferredRevenueNonCurrent,incomeTaxExpense,SellingAndMarketingExpenses,taxPayables,accountsPayables,stockBasedCompensation,taxAssets,commonStockRepurchased,inventory_cash-flow-statement,dividendsPaid,weightedAverageShsOut,ResearchAndDevelopmentExpenses
0,1,4,0.000000,0.000000,2,373.391304,118033.0,0.0,0.0,0.000000e+00,0.0,0.0,0.000000e+00,0.0,3.099000e+09,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
1,1,1,0.348427,0.555991,6,1783.786517,47000.0,0.0,0.0,4.360000e+08,0.0,0.0,-2.580000e+08,306000000.0,0.000000e+00,-1.470000e+09,-3.850000e+08,-2.526000e+09,1.771000e+09,1.497000e+09
2,1,3,0.310762,0.483527,6,430.739130,47000.0,0.0,0.0,4.480000e+08,0.0,0.0,7.780000e+08,126000000.0,0.000000e+00,-4.000000e+06,-1.700000e+07,-2.504000e+09,1.771000e+09,1.614000e+09
3,1,4,0.073506,0.088850,9,1335.913043,114000.0,0.0,0.0,2.720000e+07,0.0,146000000.0,-3.000000e+06,8500000.0,0.000000e+00,0.000000e+00,0.000000e+00,-1.270000e+07,6.720000e+07,0.000000e+00
4,1,3,0.039332,0.054597,9,430.739130,114000.0,0.0,0.0,1.490000e+07,0.0,124700000.0,0.000000e+00,6100000.0,0.000000e+00,-2.300000e+07,0.000000e+00,-1.290000e+07,6.640000e+07,0.000000e+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,2,2,0.177044,0.273921,562,363.747253,48250.0,0.0,0.0,1.890000e+08,0.0,0.0,0.000000e+00,26000000.0,0.000000e+00,-2.700000e+08,0.000000e+00,-2.690000e+08,4.144000e+08,0.000000e+00
100,1,1,0.037565,0.039076,563,1783.786517,2200000.0,0.0,0.0,7.980000e+08,0.0,904000000.0,-1.640000e+09,0.0,0.000000e+00,-2.408000e+09,-4.699000e+09,-1.543000e+09,2.769000e+09,0.000000e+00
101,2,2,0.044839,0.064268,563,363.747253,2200000.0,0.0,0.0,1.497000e+09,0.0,868000000.0,1.187000e+09,0.0,0.000000e+00,-3.339000e+09,9.690000e+08,-1.538000e+09,2.736000e+09,0.000000e+00
102,1,1,0.010516,0.092938,566,1783.786517,49300.0,0.0,0.0,1.800000e+06,0.0,0.0,5.890000e+07,24500000.0,0.000000e+00,-2.193000e+08,-1.590000e+07,-6.580000e+07,2.634000e+08,0.000000e+00


### Ridge Regularization Embedded Feature Seelection

In [311]:
# Using L2-regularization
precovid_ridge_features = get_embedded_report(precovid_df, Ridge(), count, 'ridge')
covid_ridge_features = get_embedded_report(covid_df, Ridge(), count, 'ridge')
postcovid_ridge_features = get_embedded_report(postcovid_df, Ridge(), count, 'ridge')



In [312]:
print(precovid_ridge_features)
precovid_df[precovid_ridge_features['Features']]

                                    Features   Coefficient
0                             netIncomeRatio  5.656870e-01
1                       operatingIncomeRatio  4.750188e-01
2                          Number of Layoffs  3.785652e-01
3                                       Year  1.446932e-01
4                                    Quarter  3.857612e-02
5                                        EPS  3.582766e-02
6        netCashProvidedByOperatingActivites  1.022301e-03
7                                    netDebt  9.259626e-04
8   netCashUsedProvidedByFinancingActivities  8.965106e-04
9      totalLiabilitiesAndStockholdersEquity  6.004911e-04
10                         industry_labelled  5.720195e-04
11                    cashAndCashEquivalents  5.634364e-04
12               cashAndShortTermInvestments  3.625263e-04
13                         cashAtEndOfPeriod  2.463377e-04
14          netCashUsedForInvestingActivites  2.090666e-05
15                            employee_count  2.157378e-

Unnamed: 0,netIncomeRatio,operatingIncomeRatio,Number of Layoffs,Year,Quarter,EPS,netCashProvidedByOperatingActivites,netDebt,netCashUsedProvidedByFinancingActivities,totalLiabilitiesAndStockholdersEquity,industry_labelled,cashAndCashEquivalents,cashAndShortTermInvestments,cashAtEndOfPeriod,netCashUsedForInvestingActivites,employee_count,EBITDA,goodwillAndIntangibleAssets,weightedAverageShsOut,freeCashFlow
0,0.055351,0.096405,1,2019,2,1.490000,7.360000e+08,3.442900e+10,1.179000e+09,6.196700e+10,2,3.190000e+08,5.407000e+09,3.300000e+08,-1.933000e+09,118033.0,1.157000e+09,6.196000e+09,4.450080e+08,-2.820000e+08
1,0.216258,0.236778,1,2018,2,0.590000,1.448800e+10,8.262900e+10,-3.152300e+10,3.491970e+11,4,3.197100e+10,7.097000e+10,3.197100e+10,3.947000e+09,132000.0,1.679500e+10,0.000000e+00,1.952867e+10,1.122100e+10
2,0.199276,0.231233,1,2019,1,0.617500,1.115500e+10,7.464200e+10,-2.945700e+10,3.419980e+11,4,3.798800e+10,8.009200e+10,3.981700e+10,1.334800e+10,137000.0,1.784300e+10,0.000000e+00,1.869628e+10,8.792000e+09
3,0.896958,0.000000,1,2018,4,0.627539,6.597500e+07,-9.393900e+07,-6.018800e+07,1.490701e+09,5,6.402060e+08,6.402060e+08,0.000000e+00,-5.787000e+06,3641.0,6.675900e+07,0.000000e+00,9.541878e+07,6.597500e+07
4,0.025628,0.028832,1,2018,1,0.100000,6.040000e+07,8.284000e+08,-7.000000e+06,2.754700e+09,11,1.227000e+08,1.227000e+08,1.227000e+08,-8.000000e+06,6700.0,3.380000e+07,1.506000e+09,1.068000e+08,5.240000e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
338,0.127867,0.140839,1,2018,3,0.750000,8.840000e+07,-1.011000e+08,1.010000e+07,1.946100e+09,568,2.973000e+08,2.973000e+08,2.973000e+08,-2.650000e+07,7700.0,9.140000e+07,1.273000e+08,7.390000e+07,6.190000e+07
339,0.103298,0.151044,2,2019,3,0.320000,2.627000e+08,1.366700e+09,-5.200000e+07,8.803700e+09,570,1.881300e+09,3.485600e+09,1.398200e+09,-3.000000e+07,11500.0,2.595000e+08,3.081100e+09,4.233000e+08,2.442000e+08
340,0.103541,0.173205,1,2019,4,0.320000,2.493000e+08,1.652900e+09,-1.660000e+08,8.758500e+09,570,1.818700e+09,3.517100e+09,1.456800e+09,-2.470000e+07,11500.0,3.020000e+08,3.061500e+09,4.195000e+08,2.151000e+08
341,0.087908,0.127457,1,2019,2,1.070000,7.824800e+07,1.127845e+09,-4.628500e+07,4.015603e+09,571,6.330200e+07,6.330200e+07,6.330200e+07,-3.504500e+07,9000.0,1.031120e+08,1.449349e+09,6.194100e+07,5.468400e+07


In [313]:
print(covid_ridge_features)
covid_df[covid_ridge_features['Features']]

                                    Features  Coefficient
0                       incomeBeforeTaxRatio     0.165152
1                                        EPS     0.142897
2                                    Quarter     0.131268
3                          Number of Layoffs     0.074990
4                                 new_deaths     0.014892
5                                  totalDebt     0.000318
6                         new_cases_smoothed     0.000244
7                    totalStockholdersEquity     0.000236
8                     new_deaths_per_million     0.000202
9                 totalNonCurrentLiabilities     0.000190
10                          otherLiabilities     0.000190
11                   totalCurrentLiabilities     0.000189
12                   cashAtBeginningOfPeriod     0.000107
13                           netChangeInCash     0.000107
14               cashAndShortTermInvestments     0.000099
15                         operatingCashFlow     0.000093
16  netCashUse

Unnamed: 0,incomeBeforeTaxRatio,EPS,Quarter,Number of Layoffs,new_deaths,totalDebt,new_cases_smoothed,totalStockholdersEquity,new_deaths_per_million,totalNonCurrentLiabilities,otherLiabilities,totalCurrentLiabilities,cashAtBeginningOfPeriod,netChangeInCash,cashAndShortTermInvestments,operatingCashFlow,netCashUsedProvidedByFinancingActivities,new_cases_per_million,totalLiabilities,netCashProvidedByOperatingActivites
0,-0.975418,-4.710000,3,2,890.315217,4.120500e+10,50152.321380,-5.528000e+09,2.631826,5.171700e+10,0.0,1.658400e+10,4.670000e+08,-1.700000e+08,8.284000e+09,-2.604000e+09,1.511000e+09,148.138696,6.830100e+10,-2.604000e+09
1,-0.392465,-1.970000,1,3,2236.066667,4.802400e+10,120848.299989,-7.945000e+09,6.609944,5.926100e+10,0.0,1.733300e+10,3.990000e+08,3.500000e+07,1.403900e+10,1.740000e+08,7.013000e+09,345.827267,7.659400e+10,1.740000e+08
2,0.100960,2.750000,2,1,1372.780220,3.281740e+09,26039.981154,3.726519e+09,4.058055,3.766587e+09,0.0,4.437559e+09,1.279838e+09,-1.383360e+08,1.141502e+09,4.373020e+08,-5.191350e+08,79.048879,8.204146e+09,4.373020e+08
3,0.257147,1.798273,3,1,1035.130435,8.072100e+10,102092.024870,1.355000e+10,3.059891,1.067480e+11,0.0,2.853300e+10,8.546000e+09,3.636000e+09,1.224900e+10,7.935000e+09,-3.643000e+09,310.800826,1.352810e+11,7.935000e+09
4,0.045879,2.580000,2,2,1372.780220,1.853700e+09,26039.981154,7.131000e+08,4.058055,1.328100e+09,0.0,8.932000e+08,3.886000e+08,2.246000e+08,6.132000e+08,4.269000e+08,-1.932000e+08,79.048879,2.221300e+09,4.269000e+08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
264,0.085937,0.620000,2,1,1372.780220,9.294470e+08,26039.981154,1.909766e+09,4.058055,1.461275e+09,0.0,5.356280e+08,1.028490e+08,-1.486000e+06,9.848300e+07,1.602370e+08,-1.478450e+08,79.048879,1.996903e+09,1.602370e+08
265,-0.284075,-3.360000,2,1,1372.780220,5.657000e+09,26039.981154,3.617000e+09,4.058055,6.756000e+09,0.0,2.141000e+09,1.497000e+09,9.320000e+08,2.300000e+09,-2.200000e+08,1.323000e+09,79.048879,8.897000e+09,-2.200000e+08
266,-0.581858,-4.700000,4,1,1541.793478,6.596000e+10,129908.336978,1.571500e+11,4.557598,1.122570e+11,0.0,5.636300e+10,8.832000e+09,-4.468000e+09,4.787900e+10,4.005000e+09,-5.283000e+09,397.154261,1.686200e+11,4.005000e+09
267,-0.244395,-0.440000,2,1,1372.780220,2.183000e+09,26039.981154,4.643100e+09,4.058055,3.195800e+09,0.0,7.630000e+08,2.359000e+08,8.732000e+08,1.109100e+09,1.751000e+08,6.581000e+08,79.048879,3.958800e+09,1.751000e+08


In [314]:
print(postcovid_ridge_features)
postcovid_df[postcovid_ridge_features['Features']]

                                           Features   Coefficient
0                             weightedAverageShsOut  1.381479e-07
1                                         totalDebt  1.381093e-07
2                                   interestExpense  1.186155e-07
3                  netCashUsedForInvestingActivites  1.017495e-07
4                     netIncome_cash-flow-statement  8.595588e-08
5                            stockBasedCompensation  8.460799e-08
6                                     shortTermDebt  7.152236e-08
7                                  incomeTaxExpense  7.060667e-08
8                       depreciationAndAmortization  6.785777e-08
9   depreciationAndAmortization_cash-flow-statement  6.678758e-08
10                                     longTermDebt  6.658689e-08
11                               capitalExpenditure  6.034217e-08
12                      totalOtherIncomeExpensesNet  5.854689e-08
13                                otherNonCashItems  4.341635e-08
14        

Unnamed: 0,weightedAverageShsOut,totalDebt,interestExpense,netCashUsedForInvestingActivites,netIncome_cash-flow-statement,stockBasedCompensation,shortTermDebt,incomeTaxExpense,depreciationAndAmortization,depreciationAndAmortization_cash-flow-statement,longTermDebt,capitalExpenditure,totalOtherIncomeExpensesNet,otherNonCashItems,operatingIncome,changeInWorkingCapital,deferredIncomeTax,netIncome,revenue,netCashProvidedByOperatingActivites
0,0.000000e+00,4.368700e+10,0.0,0.000000e+00,0.000000e+00,0.0,4.739000e+09,0.000000e+00,0.000000e+00,0.000000e+00,3.894800e+10,0.000000e+00,0.000000e+00,0.0,0.000000e+00,0.000000e+00,0.0,0.000000e+00,0.000000e+00,0.000000e+00
1,1.771000e+09,7.347400e+10,548000000.0,-1.591000e+09,4.490000e+09,306000000.0,9.952000e+09,4.360000e+08,2.053000e+09,2.053000e+09,6.352200e+10,-1.620000e+08,2.120000e+08,-472000000.0,4.717000e+09,-1.275000e+09,-194000000.0,4.490000e+09,1.353800e+10,4.908000e+09
2,1.771000e+09,6.960600e+10,560000000.0,1.286000e+09,3.952000e+09,126000000.0,9.207000e+09,4.480000e+08,2.205000e+09,2.205000e+09,6.039900e+10,-1.770000e+08,-2.030000e+08,602000000.0,4.603000e+09,1.338000e+09,-621000000.0,3.949000e+09,1.481200e+10,7.602000e+09
3,6.720000e+07,8.499000e+08,8500000.0,-5.200000e+06,7.460000e+07,8500000.0,1.508000e+08,2.720000e+07,2.230000e+07,2.230000e+07,6.991000e+08,-6.600000e+06,-7.800000e+06,1700000.0,1.097000e+08,-5.440000e+07,-7400000.0,7.460000e+07,1.492400e+09,4.530000e+07
4,6.640000e+07,1.402600e+09,16000000.0,-1.300000e+07,4.880000e+07,6100000.0,2.118000e+08,1.490000e+07,3.000000e+07,3.000000e+07,1.190800e+09,-1.300000e+07,-1.540000e+07,-112900000.0,7.910000e+07,1.171000e+08,28000000.0,4.890000e+07,2.011100e+09,1.171000e+08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,4.144000e+08,1.427700e+10,93000000.0,-5.280000e+08,5.870000e+08,26000000.0,2.310000e+08,1.890000e+08,5.080000e+08,5.080000e+08,1.404600e+10,-5.500000e+08,-1.140000e+08,52000000.0,8.900000e+08,-7.300000e+07,-53000000.0,5.870000e+08,5.027000e+09,1.047000e+09
100,2.769000e+09,6.681700e+10,0.0,-4.558000e+09,2.054000e+09,0.0,1.700800e+10,7.980000e+08,2.680000e+09,2.680000e+09,4.980900e+10,-3.539000e+09,-2.417000e+09,49000000.0,5.318000e+09,-8.472000e+09,-69000000.0,2.054000e+09,1.415690e+11,-3.758000e+09
101,2.736000e+09,6.530900e+10,479000000.0,-4.026000e+09,5.147000e+09,0.0,1.794800e+10,1.497000e+09,2.699000e+09,2.699000e+09,4.736100e+10,-3.953000e+09,-2.100000e+08,302000000.0,6.854000e+09,4.670000e+09,180000000.0,5.149000e+09,1.528590e+11,1.299800e+10
102,2.634000e+08,8.374000e+09,84900000.0,-1.518000e+08,3.990000e+07,24500000.0,4.196000e+08,1.800000e+06,3.736000e+08,0.000000e+00,7.954400e+09,-1.810000e+08,-1.410000e+07,643400000.0,5.660000e+07,-2.319000e+08,-86000000.0,3.990000e+07,5.382100e+09,3.899000e+08


### ElasticNet Feature Selection

In [315]:
from sklearn.linear_model import ElasticNet
from sklearn.datasets import make_regression

precovid_elasticnet = get_embedded_report(precovid_df, ElasticNet(tol=200), count, 'elasticnet')
covid_elasticnet = get_embedded_report(covid_df, ElasticNet(tol=200), count, 'elasticnet')
postcovid_elasticnet = get_embedded_report(postcovid_df, ElasticNet(tol=200), count, 'elasticnet')

In [316]:
print(precovid_elasticnet)
precovid_df[precovid_elasticnet['Features']]

                                           Features   Coefficient
0                                 industry_labelled  3.214523e-04
1                                    employee_count  2.229371e-07
2                                 commonStockIssued  1.056612e-09
3                            stockBasedCompensation  2.324120e-10
4                         deferredRevenueNonCurrent  1.361223e-10
5                                capitalExpenditure  3.973741e-11
6   depreciationAndAmortization_cash-flow-statement  3.869900e-11
7                       totalOtherIncomeExpensesNet  3.792827e-11
8                     inventory_cash-flow-statement  3.530636e-11
9                        effectOfForexChangesOnCash  3.128049e-11
10                            weightedAverageShsOut  3.090602e-11
11                           commonStockRepurchased  3.028648e-11
12                                 incomeTaxExpense  2.527534e-11
13                 GeneralAndAdministrativeExpenses  2.186544e-11
14        

Unnamed: 0,industry_labelled,employee_count,commonStockIssued,stockBasedCompensation,deferredRevenueNonCurrent,capitalExpenditure,depreciationAndAmortization_cash-flow-statement,totalOtherIncomeExpensesNet,inventory_cash-flow-statement,effectOfForexChangesOnCash,weightedAverageShsOut,commonStockRepurchased,incomeTaxExpense,GeneralAndAdministrativeExpenses,taxAssets,incomeBeforeTax,operatingExpenses,investmentsInPropertyPlantAndEquipment,ResearchAndDevelopmentExpenses,revenue
0,2,118033.0,0.0,0.000000e+00,5.249000e+09,-1.018000e+09,0.000000e+00,-271000000.0,0.000000e+00,0.0,4.450080e+08,-1.700000e+07,2.200000e+08,0.0,792000000.0,8.820000e+08,1.672000e+09,0.000000e+00,0.000000e+00,1.196000e+10
1,4,132000.0,1000000.0,1.351000e+09,2.878000e+09,-3.267000e+09,2.665000e+09,672000000.0,1.693000e+09,0.0,1.952867e+10,-2.078300e+10,1.765000e+09,0.0,0.0,1.328400e+10,7.809000e+09,-3.267000e+09,3.701000e+09,5.326500e+10
2,4,137000.0,0.0,1.514000e+09,0.000000e+00,-2.363000e+09,3.040000e+09,378000000.0,7.000000e+07,0.0,1.869628e+10,-2.370200e+10,2.232000e+09,0.0,0.0,1.379300e+10,8.406000e+09,-2.363000e+09,3.948000e+09,5.801500e+10
3,5,3641.0,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.000000e+00,0.0,9.541878e+07,0.000000e+00,6.879000e+06,0.0,0.0,6.675900e+07,0.000000e+00,0.000000e+00,0.000000e+00,6.675900e+07
4,11,6700.0,0.0,3.200000e+06,0.000000e+00,-8.000000e+06,1.830000e+07,-5600000.0,-4.350000e+07,400000.0,1.068000e+08,-9.100000e+06,-4.300000e+06,0.0,132900000.0,6.100000e+06,1.111000e+08,-8.000000e+06,0.000000e+00,4.058000e+08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
338,568,7700.0,0.0,5.300000e+06,0.000000e+00,-2.650000e+07,2.620000e+07,300000.0,0.000000e+00,-200000.0,7.390000e+07,0.000000e+00,8.000000e+06,64900000.0,34500000.0,6.110000e+07,7.230000e+07,0.000000e+00,1.010000e+07,4.317000e+08
339,570,11500.0,0.0,0.000000e+00,0.000000e+00,-1.850000e+07,6.110000e+07,-35200000.0,0.000000e+00,0.0,4.233000e+08,-1.422000e+08,2.720000e+07,0.0,0.0,1.622000e+08,3.409000e+08,-7.500000e+06,0.000000e+00,1.306900e+09
340,570,11500.0,0.0,0.000000e+00,0.000000e+00,-3.420000e+07,6.700000e+07,-29000000.0,0.000000e+00,0.0,4.195000e+08,-6.880000e+07,6.210000e+07,0.0,0.0,1.975000e+08,3.247000e+08,-9.200000e+06,0.000000e+00,1.307700e+09
341,571,9000.0,9565000.0,0.000000e+00,3.329270e+08,-2.356400e+07,0.000000e+00,-3534000.0,0.000000e+00,1081000.0,6.194100e+07,-7.126200e+07,2.620700e+07,52980000.0,15336000.0,9.231400e+07,9.364100e+07,-2.356400e+07,4.066100e+07,7.520050e+08


In [317]:
print(covid_elasticnet)
covid_df[covid_elasticnet['Features']]

                                    Features   Coefficient
0                          industry_labelled  5.128034e-04
1                                 new_deaths  1.282834e-06
2                                  new_cases  5.573375e-07
3                             employee_count  1.278124e-07
4                     stockBasedCompensation  8.954347e-10
5                            interestExpense  7.615649e-10
6                          deferredIncomeTax  3.957582e-10
7                            acquisitionsNet  8.716635e-11
8                      weightedAverageShsOut  8.642324e-11
9              netIncome_cash-flow-statement  8.148100e-11
10                 deferredRevenueNonCurrent  7.750541e-11
11            ResearchAndDevelopmentExpenses  7.225176e-11
12                             shortTermDebt  4.422956e-11
13                        capitalExpenditure  3.381594e-11
14                                 netIncome  2.572890e-11
15  netCashUsedProvidedByFinancingActivities  2.515285e-

Unnamed: 0,industry_labelled,new_deaths,new_cases,employee_count,stockBasedCompensation,interestExpense,deferredIncomeTax,acquisitionsNet,weightedAverageShsOut,netIncome_cash-flow-statement,deferredRevenueNonCurrent,ResearchAndDevelopmentExpenses,shortTermDebt,capitalExpenditure,netIncome,netCashUsedProvidedByFinancingActivities,otherFinancingActivites,inventory_cash-flow-statement,otherNonCashItems,deferredRevenue
0,2,890.315217,50113.815217,118033.0,0.0,340000000.0,0.0,0.0,5.090490e+08,0.000000e+00,7.043000e+09,0.000000e+00,4.446000e+09,-5.770000e+08,-2.399000e+09,1.511000e+09,2.052000e+09,0.0,-2.604000e+09,6.954000e+09
1,2,2236.066667,116989.855556,118033.0,0.0,371000000.0,0.0,0.0,6.346090e+08,0.000000e+00,7.055000e+09,0.000000e+00,4.039000e+09,0.000000e+00,-1.250000e+09,7.013000e+09,1.075100e+10,0.0,1.740000e+08,7.921000e+09
2,3,1372.780220,26741.428571,0.0,10029000.0,13422000.0,4435000.0,0.0,6.911800e+07,1.899600e+08,0.000000e+00,0.000000e+00,0.000000e+00,-5.713200e+07,1.899600e+08,-5.191350e+08,-1.052000e+06,172793000.0,1.326000e+06,0.000000e+00
3,6,1035.130435,105140.771739,47000.0,135000000.0,596000000.0,-34000000.0,-492000000.0,1.767807e+09,3.179000e+09,0.000000e+00,1.673000e+09,6.672000e+09,-2.170000e+08,3.179000e+09,-3.643000e+09,8.790000e+08,219000000.0,8.200000e+08,0.000000e+00
4,8,1372.780220,26741.428571,7600.0,3100000.0,15900000.0,0.0,0.0,1.920000e+07,4.960000e+07,0.000000e+00,0.000000e+00,5.940000e+08,-9.100000e+06,4.960000e+07,-1.932000e+08,5.791000e+08,455600000.0,9.300000e+06,0.000000e+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
264,571,1372.780220,26741.428571,7100.0,6613000.0,8737000.0,-1891000.0,0.0,6.230900e+07,3.846500e+07,3.561750e+08,3.452200e+07,2.002820e+08,-9.711000e+06,3.846500e+07,-1.478450e+08,2.390360e+08,22580000.0,-2.793600e+07,3.022200e+07
265,573,1372.780220,26741.428571,22514.0,0.0,64000000.0,-18000000.0,-3000000.0,1.753270e+08,-5.890000e+08,4.970000e+08,0.000000e+00,1.520000e+08,-1.730000e+08,-5.890000e+08,1.323000e+09,1.691000e+09,448000000.0,2.600000e+08,0.000000e+00
266,574,1541.793478,134353.271739,0.0,0.0,313000000.0,0.0,770000000.0,4.270213e+09,-2.007000e+10,0.000000e+00,0.000000e+00,2.045800e+10,-3.629000e+09,-2.007000e+10,-5.283000e+09,-7.800000e+07,0.0,2.418900e+10,1.816500e+10
267,576,1372.780220,26741.428571,5100.0,10200000.0,12300000.0,-24300000.0,0.0,2.187000e+08,-9.540000e+07,0.000000e+00,0.000000e+00,3.000000e+05,-1.320000e+07,-9.540000e+07,6.581000e+08,1.381300e+09,56300000.0,-8.200000e+06,0.000000e+00


In [318]:
print(postcovid_elasticnet)
postcovid_df[postcovid_elasticnet['Features']]

                            Features   Coefficient
0                  industry_labelled  5.731278e-04
1                         new_deaths  4.185718e-05
2                     employee_count  7.021197e-08
3                  commonStockIssued  2.756742e-09
4          deferredRevenueNonCurrent  2.430035e-10
5                   incomeTaxExpense  1.999491e-10
6                        taxPayables  1.526744e-10
7        SellingAndMarketingExpenses  1.086385e-10
8                   accountsPayables  1.076244e-10
9                          taxAssets  8.131360e-11
10            commonStockRepurchased  6.594586e-11
11     inventory_cash-flow-statement  4.591402e-11
12             weightedAverageShsOut  4.305136e-11
13                     dividendsPaid  4.135900e-11
14            changeInWorkingCapital  2.864619e-11
15                 otherNonCashItems  1.824800e-11
16       totalOtherIncomeExpensesNet  1.325871e-11
17                   incomeBeforeTax  8.563283e-12
18                otherCurrentA

Unnamed: 0,industry_labelled,new_deaths,employee_count,commonStockIssued,deferredRevenueNonCurrent,incomeTaxExpense,taxPayables,SellingAndMarketingExpenses,accountsPayables,taxAssets,commonStockRepurchased,inventory_cash-flow-statement,weightedAverageShsOut,dividendsPaid,changeInWorkingCapital,otherNonCashItems,totalOtherIncomeExpensesNet,incomeBeforeTax,otherCurrentAssets,netCashUsedForInvestingActivites
0,2,373.391304,118033.0,0.0,0.0,0.000000e+00,0.0,0.0,0.000000e+00,3.099000e+09,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.000000e+00,0.000000e+00,1.213600e+10,0.000000e+00
1,6,1783.786517,47000.0,0.0,0.0,4.360000e+08,0.0,0.0,-2.580000e+08,0.000000e+00,-1.470000e+09,-3.850000e+08,1.771000e+09,-2.526000e+09,-1.275000e+09,-472000000.0,2.120000e+08,4.929000e+09,4.721000e+09,-1.591000e+09
2,6,430.739130,47000.0,0.0,0.0,4.480000e+08,0.0,0.0,7.780000e+08,0.000000e+00,-4.000000e+06,-1.700000e+07,1.771000e+09,-2.504000e+09,1.338000e+09,602000000.0,-2.030000e+08,4.400000e+09,4.570000e+09,1.286000e+09
3,9,1335.913043,114000.0,0.0,0.0,2.720000e+07,146000000.0,0.0,-3.000000e+06,0.000000e+00,0.000000e+00,0.000000e+00,6.720000e+07,-1.270000e+07,-5.440000e+07,1700000.0,-7.800000e+06,1.019000e+08,1.405000e+08,-5.200000e+06
4,9,430.739130,114000.0,0.0,0.0,1.490000e+07,124700000.0,0.0,0.000000e+00,0.000000e+00,-2.300000e+07,0.000000e+00,6.640000e+07,-1.290000e+07,1.171000e+08,-112900000.0,-1.540000e+07,6.370000e+07,2.095000e+08,-1.300000e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,562,363.747253,48250.0,0.0,0.0,1.890000e+08,0.0,0.0,0.000000e+00,0.000000e+00,-2.700000e+08,0.000000e+00,4.144000e+08,-2.690000e+08,-7.300000e+07,52000000.0,-1.140000e+08,7.760000e+08,2.690000e+08,-5.280000e+08
100,563,1783.786517,2200000.0,0.0,0.0,7.980000e+08,904000000.0,0.0,-1.640000e+09,0.000000e+00,-2.408000e+09,-4.699000e+09,2.769000e+09,-1.543000e+09,-8.472000e+09,49000000.0,-2.417000e+09,2.901000e+09,2.500000e+09,-4.558000e+09
101,563,363.747253,2200000.0,0.0,0.0,1.497000e+09,868000000.0,0.0,1.187000e+09,0.000000e+00,-3.339000e+09,9.690000e+08,2.736000e+09,-1.538000e+09,4.670000e+09,302000000.0,-2.100000e+08,6.644000e+09,2.798000e+09,-4.026000e+09
102,566,1783.786517,49300.0,0.0,0.0,1.800000e+06,0.0,0.0,5.890000e+07,0.000000e+00,-2.193000e+08,-1.590000e+07,2.634000e+08,-6.580000e+07,-2.319000e+08,643400000.0,-1.410000e+07,4.250000e+07,5.503000e+08,-1.518000e+08


### Decision Tree Feature Selection

In [319]:
from sklearn.tree import DecisionTreeRegressor

precovid_decisiontree = get_embedded_report(precovid_df, DecisionTreeRegressor(), count, 'decisiontree')
covid_decisiontree = get_embedded_report(covid_df, DecisionTreeRegressor(), count, 'decisiontree')
postcovid_decisiontree = get_embedded_report(postcovid_df, DecisionTreeRegressor(), count, 'decisiontree')


In [320]:
print(precovid_decisiontree)
precovid_df[precovid_decisiontree['Features']]

                        Features  Coefficient
0                    grossProfit     0.364394
1              Number of Layoffs     0.112756
2               incomeTaxExpense     0.111269
3                        Quarter     0.094197
4              commonStockIssued     0.042981
5        otherFinancingActivites     0.041129
6        otherInvestingActivites     0.029813
7     otherNonCurrentLiabilities     0.017692
8                            EPS     0.015473
9                 employee_count     0.014505
10                 dividendsPaid     0.014439
11         weightedAverageShsOut     0.011098
12                 costOfRevenue     0.010179
13                 shortTermDebt     0.010067
14                       netDebt     0.009094
15              retainedEarnings     0.008150
16                   commonStock     0.007320
17  othertotalStockholdersEquity     0.006345
18                       revenue     0.006268
19               netChangeInCash     0.005925


Unnamed: 0,grossProfit,Number of Layoffs,incomeTaxExpense,Quarter,commonStockIssued,otherFinancingActivites,otherInvestingActivites,otherNonCurrentLiabilities,EPS,employee_count,dividendsPaid,weightedAverageShsOut,costOfRevenue,shortTermDebt,netDebt,retainedEarnings,commonStock,othertotalStockholdersEquity,revenue,netChangeInCash
0,2.946000e+09,1,2.200000e+08,2,0.0,2.172000e+09,-863000000.0,7.008000e+09,1.490000,118033.0,-4.400000e+07,4.450080e+08,9.014000e+09,5.139000e+09,3.442900e+10,1.514000e+09,5.000000e+06,4.386000e+09,1.196000e+10,-1.800000e+07
1,2.042100e+10,1,1.765000e+09,2,1000000.0,-1.088000e+09,-576000000.0,4.529600e+10,0.590000,132000.0,-3.653000e+09,1.952867e+10,3.284400e+10,1.747200e+10,8.262900e+10,7.943600e+10,3.862400e+10,0.000000e+00,5.326500e+10,-1.308800e+10
2,2.182100e+10,1,2.232000e+09,1,0.0,-2.312000e+09,86000000.0,5.216500e+10,0.617500,137000.0,-3.443000e+09,1.869628e+10,3.619400e+10,2.242900e+10,7.464200e+10,6.455800e+10,4.280100e+10,0.000000e+00,5.801500e+10,-4.954000e+09
3,0.000000e+00,1,6.879000e+06,4,0.0,5.864000e+06,0.0,0.000000e+00,0.627539,3641.0,-6.605200e+07,9.541878e+07,0.000000e+00,0.000000e+00,-9.393900e+07,0.000000e+00,1.557277e+09,-2.822000e+07,6.675900e+07,0.000000e+00
4,1.275000e+08,1,-4.300000e+06,1,0.0,2.010000e+07,0.0,4.174000e+08,0.100000,6700.0,-6.400000e+06,1.068000e+08,2.783000e+08,4.270000e+07,8.284000e+08,-7.339000e+08,1.100000e+06,1.965200e+09,4.058000e+08,4.580000e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
338,1.356000e+08,1,8.000000e+06,3,0.0,2.040000e+07,-26500000.0,9.760000e+07,0.750000,7700.0,-1.030000e+07,7.390000e+07,2.961000e+08,0.000000e+00,-1.011000e+08,1.312700e+09,1.880000e+07,1.741000e+08,4.317000e+08,7.180000e+07
339,5.383000e+08,2,2.720000e+07,3,0.0,1.744000e+08,-11000000.0,5.338000e+08,0.320000,11500.0,-8.420000e+07,4.233000e+08,7.686000e+08,4.350000e+08,1.366700e+09,-6.594000e+08,4.200000e+06,8.186000e+08,1.306900e+09,1.807000e+08
340,5.512000e+08,1,6.210000e+07,4,0.0,3.114000e+08,-25000000.0,2.560000e+08,0.320000,11500.0,-8.370000e+07,4.195000e+08,7.565000e+08,2.450000e+08,1.652900e+09,-6.759000e+08,4.200000e+06,8.412000e+08,1.307700e+09,5.860000e+07
341,1.894890e+08,1,2.620700e+07,2,9565000.0,4.184030e+08,538000.0,1.512570e+08,1.070000,9000.0,-1.007100e+07,6.194100e+07,5.625160e+08,1.800000e+08,1.127845e+09,2.168204e+09,1.060000e+05,-3.998190e+08,7.520050e+08,-2.001000e+06


In [321]:
print(covid_decisiontree)
covid_df[covid_decisiontree['Features']]

                            Features  Coefficient
0                   incomeTaxExpense     0.418354
1         otherNonCurrentLiabilities     0.317865
2        goodwillAndIntangibleAssets     0.121324
3          deferredRevenueNonCurrent     0.040766
4                  commonStockIssued     0.034538
5         effectOfForexChangesOnCash     0.015869
6            totalStockholdersEquity     0.012096
7                  Number of Layoffs     0.011106
8                        totalAssets     0.003702
9                    costAndExpenses     0.002878
10                           netDebt     0.002773
11       totalOtherIncomeExpensesNet     0.002391
12            commonStockRepurchased     0.002126
13                               EPS     0.001597
14  netCashUsedForInvestingActivites     0.001435
15                 industry_labelled     0.001417
16               otherWorkingCapital     0.001393
17                      freeCashFlow     0.001309
18                    netReceivables     0.000816


Unnamed: 0,incomeTaxExpense,otherNonCurrentLiabilities,goodwillAndIntangibleAssets,deferredRevenueNonCurrent,commonStockIssued,effectOfForexChangesOnCash,totalStockholdersEquity,Number of Layoffs,totalAssets,costAndExpenses,netDebt,totalOtherIncomeExpensesNet,commonStockRepurchased,EPS,netCashUsedForInvestingActivites,industry_labelled,otherWorkingCapital,freeCashFlow,netReceivables,changeInWorkingCapital
0,-6.960000e+08,7.915000e+09,6.130000e+09,7.043000e+09,0.0,0.0,-5.528000e+09,2,6.277300e+10,6.339000e+09,4.095200e+10,-224000000.0,0.0,-4.710000,9.230000e+08,2,0.0,-3.181000e+09,1.135000e+09,0.000000e+00
1,-3.230000e+08,8.221000e+09,6.110000e+09,7.055000e+09,316000000.0,0.0,-7.945000e+09,3,6.864900e+10,7.031000e+09,4.774700e+10,-258000000.0,0.0,-1.970000,-7.152000e+09,2,0.0,1.740000e+08,9.710000e+08,0.000000e+00
2,6.258000e+07,1.467610e+08,1.682878e+09,0.000000e+00,783000.0,395000.0,3.726519e+09,1,1.193066e+10,2.238535e+09,2.140238e+09,-10305000.0,-1442000.0,2.750000,-5.689800e+07,3,-34176000.0,3.801700e+08,7.423460e+08,1.749100e+08
3,5.080000e+08,2.909700e+10,1.097520e+11,0.000000e+00,0.0,-41000000.0,1.355000e+10,1,1.488580e+11,9.646000e+09,6.853900e+10,-618000000.0,-6000000.0,1.798273,-6.150000e+08,6,-66000000.0,7.718000e+09,9.281000e+09,1.708000e+09
4,1.670000e+07,4.370000e+07,3.197000e+08,0.000000e+00,0.0,0.0,7.131000e+08,2,2.934400e+09,1.362900e+09,1.240500e+09,-15900000.0,-100000.0,2.580000,-9.100000e+06,8,7800000.0,4.178000e+08,2.028000e+08,3.552000e+08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
264,6.551000e+06,2.193520e+08,1.391530e+09,3.561750e+08,2064000.0,-1553000.0,1.909766e+09,1,3.906669e+09,4.873940e+08,8.309640e+08,8584000.0,0.0,0.620000,-1.232500e+07,571,75106000.0,1.505260e+08,5.722000e+08,1.128800e+08
265,-5.000000e+06,7.480000e+08,1.320000e+08,4.970000e+08,0.0,5000000.0,3.617000e+09,1,1.255100e+10,2.495000e+09,3.357000e+09,-190000000.0,0.0,-3.360000,-1.760000e+08,573,7000000.0,-3.930000e+08,9.390000e+08,1.270000e+08
266,-6.010000e+09,4.859000e+10,1.678900e+10,0.000000e+00,0.0,112000000.0,1.571500e+11,1,3.327500e+11,7.244700e+10,6.159600e+10,96000000.0,-100000000.0,-4.700000,-3.302000e+09,574,0.0,3.760000e+08,2.058100e+10,-1.140000e+08
267,-2.400000e+07,5.739000e+08,5.266800e+09,0.000000e+00,0.0,5100000.0,4.643100e+09,1,8.604200e+09,5.936000e+08,1.073900e+09,-15600000.0,0.0,-0.440000,3.490000e+07,576,-21600000.0,1.619000e+08,5.002000e+08,2.139000e+08


In [322]:
print(postcovid_decisiontree)
postcovid_df[postcovid_decisiontree['Features']]

                                   Features  Coefficient
0                                 new_cases     0.252734
1                   otherInvestingActivites     0.190904
2                               grossProfit     0.141622
3                      operatingIncomeRatio     0.071272
4                           operatingIncome     0.055452
5                           netChangeInCash     0.052513
6               SellingAndMarketingExpenses     0.049376
7                        otherCurrentAssets     0.048245
8                  weightedAverageShsOutDil     0.048172
9                   otherCurrentLiabilities     0.011710
10  accumulatedOtherComprehensiveIncomeLoss     0.008105
11                      longTermInvestments     0.007571
12                          acquisitionsNet     0.007529
13                                inventory     0.007185
14             othertotalStockholdersEquity     0.006984
15                         accountsPayables     0.006983
16                         reta

Unnamed: 0,new_cases,otherInvestingActivites,grossProfit,operatingIncomeRatio,operatingIncome,netChangeInCash,SellingAndMarketingExpenses,otherCurrentAssets,weightedAverageShsOutDil,otherCurrentLiabilities,accumulatedOtherComprehensiveIncomeLoss,longTermInvestments,acquisitionsNet,inventory,othertotalStockholdersEquity,accountsPayables,retainedEarnings,goodwill,otherExpenses,totalLiabilities
0,48271.978261,0.0,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.0,1.213600e+10,0.000000e+00,1.460800e+10,-4.585000e+09,0.0,0.0,0.000000e+00,7.291000e+09,0.000000e+00,-8.511000e+09,4.091000e+09,0.000000e+00,7.051500e+10
1,287196.233333,154000000.0,9.486000e+09,0.348427,4.717000e+09,-3.648000e+09,0.0,4.721000e+09,1.778000e+09,2.256900e+10,-2.984000e+09,260000000.0,-185000000.0,3.483000e+09,1.414600e+10,-2.580000e+08,5.103000e+09,3.229800e+10,0.000000e+00,1.268970e+11
2,94138.326087,170000000.0,9.790000e+09,0.310762,4.603000e+09,3.311000e+09,0.0,4.570000e+09,1.776000e+09,2.350500e+10,-3.443000e+09,235000000.0,-100000000.0,3.172000e+09,0.000000e+00,7.780000e+08,4.953000e+09,3.172600e+10,2.290000e+08,1.252980e+11
3,114728.956522,1400000.0,0.000000e+00,0.073506,1.097000e+08,-1.590000e+07,0.0,1.405000e+08,6.760000e+07,5.035000e+08,-2.590000e+07,11800000.0,0.0,0.000000e+00,7.269000e+08,-3.000000e+06,8.675000e+08,1.674600e+09,1.260200e+09,2.213000e+09
4,94138.326087,-900000.0,2.011100e+09,0.039332,7.910000e+07,9.100000e+06,0.0,2.095000e+08,6.690000e+07,7.012000e+08,-1.620000e+07,14500000.0,0.0,0.000000e+00,6.066000e+08,0.000000e+00,1.110600e+09,2.485600e+09,1.593400e+09,3.151700e+09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,76131.879121,-533000000.0,1.885000e+09,0.177044,8.900000e+08,7.470000e+08,0.0,2.690000e+08,4.164000e+08,1.493000e+09,-1.700000e+07,607000000.0,5000000.0,1.590000e+08,-5.362000e+09,0.000000e+00,1.256300e+10,9.022000e+09,5.220000e+08,2.293600e+10
100,287196.233333,-421000000.0,3.472200e+10,0.037565,5.318000e+09,-2.952000e+09,0.0,2.500000e+09,2.782000e+09,2.569200e+10,-8.498000e+09,0.0,-598000000.0,6.122900e+10,4.587000e+09,-1.640000e+09,8.053200e+10,2.943800e+10,0.000000e+00,1.692460e+11
101,76131.879121,-55000000.0,3.702100e+10,0.044839,6.854000e+09,2.108000e+09,0.0,2.798000e+09,2.745000e+09,2.689200e+10,-9.894000e+09,0.0,-18000000.0,5.992100e+10,4.672000e+09,1.187000e+09,8.251900e+10,2.866400e+10,0.000000e+00,1.613520e+11
102,287196.233333,-151800000.0,1.003700e+09,0.010516,5.660000e+07,6.890000e+07,0.0,5.503000e+08,2.653000e+08,1.273200e+09,-8.802000e+08,0.0,0.0,2.233500e+09,1.079350e+10,5.890000e+07,1.662500e+09,5.969300e+09,8.810000e+07,1.760430e+10


### Random Forest Feature Selection

In [323]:
from sklearn.ensemble import RandomForestRegressor

precovid_randomforest = get_embedded_report(precovid_df, RandomForestRegressor(), count, 'randomforest')
covid_randomforest = get_embedded_report(covid_df, RandomForestRegressor(), count, 'randomforest')
postcovid_randomforest = get_embedded_report(postcovid_df, RandomForestRegressor(), count, 'randomforest')

In [324]:
print(precovid_randomforest)
precovid_df[precovid_randomforest['Features']]

                     Features  Coefficient
0                 grossProfit     0.172788
1           Number of Layoffs     0.126702
2           commonStockIssued     0.044298
3        operatingIncomeRatio     0.041501
4                  EPSDiluted     0.034353
5                         EPS     0.027524
6     otherInvestingActivites     0.027192
7     otherFinancingActivites     0.023247
8            accountsPayables     0.023206
9            incomeTaxExpense     0.023141
10          deferredIncomeTax     0.019256
11                    netDebt     0.019007
12                EBITDARatio     0.016582
13  deferredRevenueNonCurrent     0.016087
14            netChangeInCash     0.015773
15               freeCashFlow     0.013615
16          industry_labelled     0.012753
17            operatingIncome     0.011459
18                  netIncome     0.011064
19             netIncomeRatio     0.010734


Unnamed: 0,grossProfit,Number of Layoffs,commonStockIssued,operatingIncomeRatio,EPSDiluted,EPS,otherInvestingActivites,otherFinancingActivites,accountsPayables,incomeTaxExpense,deferredIncomeTax,netDebt,EBITDARatio,deferredRevenueNonCurrent,netChangeInCash,freeCashFlow,industry_labelled,operatingIncome,netIncome,netIncomeRatio
0,2.946000e+09,1,0.0,0.096405,1.490000,1.490000,-863000000.0,2.172000e+09,0.000000e+00,2.200000e+08,0.000000e+00,3.442900e+10,0.096739,5.249000e+09,-1.800000e+07,-2.820000e+08,2,1.153000e+09,6.620000e+08,0.055351
1,2.042100e+10,1,1000000.0,0.236778,0.585000,0.590000,-576000000.0,-1.088000e+09,2.081000e+09,1.765000e+09,1.126000e+09,8.262900e+10,0.315310,2.878000e+09,-1.308800e+10,1.122100e+10,4,1.261200e+10,1.151900e+10,0.216258
2,2.182100e+10,1,0.0,0.231233,0.615000,0.617500,86000000.0,-2.312000e+09,-1.152300e+10,2.232000e+09,-1.770000e+08,7.464200e+10,0.307558,0.000000e+00,-4.954000e+09,8.792000e+09,4,1.341500e+10,1.156100e+10,0.199276
3,0.000000e+00,1,0.0,0.000000,0.627539,0.627539,0.0,5.864000e+06,0.000000e+00,6.879000e+06,0.000000e+00,-9.393900e+07,1.000000,0.000000e+00,0.000000e+00,6.597500e+07,5,0.000000e+00,5.988000e+07,0.896958
4,1.275000e+08,1,0.0,0.028832,0.090000,0.100000,0.0,2.010000e+07,8.800000e+06,-4.300000e+06,0.000000e+00,8.284000e+08,0.083292,0.000000e+00,4.580000e+07,5.240000e+07,11,1.170000e+07,1.040000e+07,0.025628
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
338,1.356000e+08,1,0.0,0.140839,0.730000,0.750000,-26500000.0,2.040000e+07,0.000000e+00,8.000000e+06,0.000000e+00,-1.011000e+08,0.211721,0.000000e+00,7.180000e+07,6.190000e+07,568,6.080000e+07,5.520000e+07,0.127867
339,5.383000e+08,2,0.0,0.151044,0.320000,0.320000,-11000000.0,1.744000e+08,1.272000e+08,2.720000e+07,0.000000e+00,1.366700e+09,0.198561,0.000000e+00,1.807000e+08,2.442000e+08,570,1.974000e+08,1.350000e+08,0.103298
340,5.512000e+08,1,0.0,0.173205,0.320000,0.320000,-25000000.0,3.114000e+08,2.640000e+07,6.210000e+07,0.000000e+00,1.652900e+09,0.230940,0.000000e+00,5.860000e+07,2.151000e+08,570,2.265000e+08,1.354000e+08,0.103541
341,1.894890e+08,1,9565000.0,0.127457,1.020000,1.070000,538000.0,4.184030e+08,0.000000e+00,2.620700e+07,0.000000e+00,1.127845e+09,0.137116,3.329270e+08,-2.001000e+06,5.468400e+07,571,9.584800e+07,6.610700e+07,0.087908


In [325]:
print(covid_randomforest)
covid_df[covid_randomforest['Features']]

                                    Features  Coefficient
0                                     EBITDA     0.096612
1                           incomeTaxExpense     0.093646
2                  deferredRevenueNonCurrent     0.064999
3                                grossProfit     0.064483
4                          otherNonCashItems     0.055979
5                          Number of Layoffs     0.043245
6                            deferredRevenue     0.042131
7                          commonStockIssued     0.039147
8                   weightedAverageShsOutDil     0.026766
9                    totalStockholdersEquity     0.024885
10                         industry_labelled     0.022681
11                    stockBasedCompensation     0.022097
12  netCashUsedProvidedByFinancingActivities     0.019852
13          netCashUsedForInvestingActivites     0.013871
14                        capitalExpenditure     0.012634
15               goodwillAndIntangibleAssets     0.012203
16            

Unnamed: 0,EBITDA,incomeTaxExpense,deferredRevenueNonCurrent,grossProfit,otherNonCashItems,Number of Layoffs,deferredRevenue,commonStockIssued,weightedAverageShsOutDil,totalStockholdersEquity,industry_labelled,stockBasedCompensation,netCashUsedProvidedByFinancingActivities,netCashUsedForInvestingActivites,capitalExpenditure,goodwillAndIntangibleAssets,totalNonCurrentLiabilities,retainedEarnings,EPSDiluted,weightedAverageShsOut
0,-2.755000e+09,-6.960000e+08,7.043000e+09,-2.437000e+09,-2.604000e+09,2,6.954000e+09,0.0,5.090490e+08,-5.528000e+09,2,0.0,1.511000e+09,9.230000e+08,-5.770000e+08,6.130000e+09,5.171700e+10,-4.487000e+09,-4.71,5.090490e+08
1,-1.202000e+09,-3.230000e+08,7.055000e+09,-2.156000e+09,1.740000e+08,3,7.921000e+09,316000000.0,6.346090e+08,-7.945000e+09,2,0.0,7.013000e+09,-7.152000e+09,0.000000e+00,6.110000e+09,5.926100e+10,-7.895000e+09,-1.97,6.346090e+08
2,3.226040e+08,6.258000e+07,0.000000e+00,1.096714e+09,1.326000e+06,1,0.000000e+00,783000.0,6.929400e+07,3.726519e+09,3,10029000.0,-5.191350e+08,-5.689800e+07,-5.713200e+07,1.682878e+09,3.766587e+09,3.971507e+09,2.74,6.911800e+07
3,6.410000e+09,5.080000e+08,0.000000e+00,9.952000e+09,8.200000e+08,1,0.000000e+00,0.0,1.777000e+09,1.355000e+10,6,135000000.0,-3.643000e+09,-6.150000e+08,-2.170000e+08,1.097520e+11,1.067480e+11,1.600000e+09,1.78,1.767807e+09
4,9.190000e+07,1.670000e+07,0.000000e+00,2.428000e+08,9.300000e+06,2,0.000000e+00,0.0,1.930000e+07,7.131000e+08,8,3100000.0,-1.932000e+08,-9.100000e+06,-9.100000e+06,3.197000e+08,1.328100e+09,1.163600e+09,2.57,1.920000e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
264,8.585900e+07,6.551000e+06,3.561750e+08,1.283150e+08,-2.793600e+07,1,3.022200e+07,2064000.0,6.342700e+07,1.909766e+09,571,6613000.0,-1.478450e+08,-1.232500e+07,-9.711000e+06,1.391530e+09,1.461275e+09,2.372733e+09,0.61,6.230900e+07
265,-5.300000e+08,-5.000000e+06,4.970000e+08,-1.830000e+08,2.600000e+08,1,0.000000e+00,0.0,1.753270e+08,3.617000e+09,573,0.0,1.323000e+09,-1.760000e+08,-1.730000e+08,1.320000e+08,6.756000e+09,-4.380000e+08,-3.36,1.753270e+08
266,2.022628e+10,-6.010000e+09,0.000000e+00,-1.659400e+10,2.418900e+10,1,1.816500e+10,0.0,4.272000e+09,1.571500e+11,574,0.0,-5.283000e+09,-3.302000e+09,-3.629000e+09,1.678900e+10,1.122570e+11,3.839430e+11,-4.70,4.270213e+09
267,-2.820000e+07,-2.400000e+07,0.000000e+00,1.761000e+08,-8.200000e+06,1,0.000000e+00,0.0,2.187000e+08,4.643100e+09,576,10200000.0,6.581000e+08,3.490000e+07,-1.320000e+07,5.266800e+09,3.195800e+09,1.124700e+09,-0.44,2.187000e+08


In [326]:
print(postcovid_randomforest)
postcovid_df[postcovid_randomforest['Features']]

                            Features  Coefficient
0            otherInvestingActivites     0.212912
1                        grossProfit     0.077115
2                   retainedEarnings     0.064560
3              weightedAverageShsOut     0.029216
4           weightedAverageShsOutDil     0.026855
5                             EBITDA     0.025432
6                  Number of Layoffs     0.023316
7          deferredRevenueNonCurrent     0.023126
8                  operatingExpenses     0.022556
9                    operatingIncome     0.017882
10           cashAtBeginningOfPeriod     0.017858
11                   netChangeInCash     0.017851
12                       commonStock     0.014792
13        effectOfForexChangesOnCash     0.013348
14                   acquisitionsNet     0.012378
15                         netIncome     0.012369
16       cashAndShortTermInvestments     0.011042
17       totalOtherIncomeExpensesNet     0.011033
18  netCashUsedForInvestingActivites     0.010923


Unnamed: 0,otherInvestingActivites,grossProfit,retainedEarnings,weightedAverageShsOut,weightedAverageShsOutDil,EBITDA,Number of Layoffs,deferredRevenueNonCurrent,operatingExpenses,operatingIncome,cashAtBeginningOfPeriod,netChangeInCash,commonStock,effectOfForexChangesOnCash,acquisitionsNet,netIncome,cashAndShortTermInvestments,totalOtherIncomeExpensesNet,netCashUsedForInvestingActivites,totalCurrentAssets
0,0.0,0.000000e+00,-8.511000e+09,0.000000e+00,0.000000e+00,0.000000e+00,1,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,6000000.0,0.0,0.0,0.000000e+00,9.950000e+08,0.000000e+00,0.000000e+00,1.526900e+10
1,154000000.0,9.486000e+09,5.103000e+09,1.771000e+09,1.778000e+09,7.527000e+09,1,0.0,4.624000e+09,4.717000e+09,9.746000e+09,-3.648000e+09,18000000.0,7000000.0,-185000000.0,4.490000e+09,7.572000e+09,2.120000e+08,-1.591000e+09,2.650900e+10
2,170000000.0,9.790000e+09,4.953000e+09,1.771000e+09,1.776000e+09,7.162000e+09,1,0.0,5.147000e+09,4.603000e+09,8.521000e+09,3.311000e+09,18000000.0,-59000000.0,-100000000.0,3.949000e+09,1.187900e+10,-2.030000e+08,1.286000e+09,3.036400e+10
3,1400000.0,0.000000e+00,8.675000e+08,6.720000e+07,6.760000e+07,1.326000e+08,1,0.0,1.382800e+09,1.097000e+08,3.942000e+08,-1.590000e+07,700000.0,1900000.0,0.0,7.460000e+07,3.783000e+08,-7.800000e+06,-5.200000e+06,1.474600e+09
4,-900000.0,2.011100e+09,1.110600e+09,6.640000e+07,6.690000e+07,1.098000e+08,1,0.0,1.753100e+09,7.910000e+07,6.390000e+07,9.100000e+06,700000.0,-1000000.0,0.0,4.890000e+07,7.300000e+07,-1.540000e+07,-1.300000e+07,1.561200e+09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,-533000000.0,1.885000e+09,1.256300e+10,4.144000e+08,4.164000e+08,1.377000e+09,2,0.0,9.950000e+08,8.900000e+08,2.460000e+08,7.470000e+08,6000000.0,0.0,5000000.0,5.870000e+08,8.940000e+08,-1.140000e+08,-5.280000e+08,3.987000e+09
100,-421000000.0,3.472200e+10,8.053200e+10,2.769000e+09,2.782000e+09,5.532000e+09,1,0.0,2.940400e+10,5.318000e+09,1.483400e+10,-2.952000e+09,275000000.0,49000000.0,-598000000.0,2.054000e+09,1.181700e+10,-2.417000e+09,-4.558000e+09,8.322000e+10
101,-55000000.0,3.702100e+10,8.251900e+10,2.736000e+09,2.745000e+09,9.824000e+09,2,0.0,3.016700e+10,6.854000e+09,1.188200e+10,2.108000e+09,272000000.0,-149000000.0,-18000000.0,5.149000e+09,1.392300e+10,-2.100000e+08,-4.026000e+09,8.416400e+10
102,-151800000.0,1.003700e+09,1.662500e+09,2.634000e+08,2.653000e+08,5.002000e+08,1,0.0,5.812000e+08,5.660000e+07,2.913000e+08,6.890000e+07,2600000.0,14300000.0,0.0,3.990000e+07,3.602000e+08,-1.410000e+07,-1.518000e+08,5.985900e+09
