# Checking Feature Selection

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from glob import glob 
from data_mani.visu import fs_results_aggregation
from data_mani.visu import get_top_features_from_fs_results 

## 1) NYSE
### 1.1) SFI

In [2]:
path_sfi_nyse = glob("results/sfi/nyse/*.csv")
path_sfi_nyse.sort()
sfi_nyse_result = fs_results_aggregation(path_sfi_nyse,n=10)
get_top_features_from_fs_results(sfi_nyse_result)

100%|██████████| 1628/1628 [00:08<00:00, 188.10it/s]


Unnamed: 0,word,lags,frequency
0,DOW JONES,5,2.0%
1,dow jones,8,1.4%
2,happy,19,1.3%
3,dow jones,15,1.2%
4,bonds,1,1.0%
5,chance,18,0.9%
6,happy,17,0.9%
7,headlines,2,0.9%
8,debt,10,0.8%
9,derivatives,18,0.7%


### 1.2) MDI

In [3]:
path_mdi_nyse = glob("results/mdi/nyse/*.csv")
path_mdi_nyse.sort()
mdi_nyse_result = fs_results_aggregation(path_mdi_nyse,n=10)
get_top_features_from_fs_results(mdi_nyse_result)

100%|██████████| 1628/1628 [00:09<00:00, 165.82it/s]


Unnamed: 0,word,lags,frequency
0,short selling,16,4.7%
1,short selling,18,4.0%
2,short selling,15,3.6%
3,short sell,16,3.1%
4,short selling,14,2.6%
5,short selling,13,2.5%
6,short selling,17,2.1%
7,short selling,20,1.4%
8,short sell,18,1.2%
9,short selling,8,1.2%


### 1.3) HUANG

In [4]:
path_huang_nyse = glob("results/huang/nyse/*.csv")
path_huang_nyse.sort()
huang_nyse_result = fs_results_aggregation(path_huang_nyse,n=10)
get_top_features_from_fs_results(huang_nyse_result)

100%|██████████| 1355/1355 [00:04<00:00, 337.70it/s]


Unnamed: 0,word,lags,frequency
0,DOW JONES,2,4.7%
1,DOW JONES,3,2.5%
2,DOW JONES,19,1.5%
3,banking,1,1.4%
4,DOW JONES,1,1.2%
5,banking,5,1.2%
6,DOW JONES,10,1.2%
7,consumption,3,1.1%
8,bubble,3,1.1%
9,DOW JONES,5,1.0%


## 2) NASDAQ
### 2.1) SFI

In [5]:
path_sfi_nasdaq = glob("results/sfi/nasdaq/*.csv")
path_sfi_nasdaq.sort()
sfi_nasdaq_result = fs_results_aggregation(path_sfi_nasdaq, n=10)
get_top_features_from_fs_results(sfi_nasdaq_result)

100%|██████████| 7868/7868 [00:47<00:00, 166.95it/s]


Unnamed: 0,word,lags,frequency
0,happy,19,0.5%
1,labor,4,0.4%
2,DOW JONES,5,0.4%
3,lifestyle,18,0.4%
4,bonds,1,0.4%
5,dow jones,15,0.3%
6,dow jones,8,0.3%
7,chance,18,0.3%
8,happy,17,0.3%
9,debt,10,0.3%


### 2.2) MDI

In [6]:
path_mdi_nasdaq = glob("results/mdi/nasdaq/*.csv")
path_mdi_nasdaq.sort()
mdi_nasdaq_result = fs_results_aggregation(path_mdi_nasdaq,n=10)
get_top_features_from_fs_results(mdi_nasdaq_result)

100%|██████████| 7868/7868 [01:01<00:00, 128.86it/s]


Unnamed: 0,word,lags,frequency
0,short selling,16,1.6%
1,short selling,18,1.4%
2,short selling,14,1.2%
3,short selling,15,1.2%
4,short sell,16,1.2%
5,short selling,13,1.0%
6,short selling,12,0.7%
7,short sell,15,0.6%
8,short sell,14,0.6%
9,short sell,18,0.6%


### 2.3) HUANG

In [7]:
path_huang_nasdaq = glob("results/huang/nasdaq/*.csv")
path_huang_nasdaq.sort()
huang_nasdaq_result = fs_results_aggregation(path_huang_nasdaq,n=10)
get_top_features_from_fs_results(huang_nasdaq_result)

100%|██████████| 1105/1105 [00:04<00:00, 274.46it/s]


Unnamed: 0,word,lags,frequency
0,DOW JONES,1,2.0%
1,DOW JONES,3,1.7%
2,DOW JONES,19,1.4%
3,blacklist,1,1.4%
4,DOW JONES,2,1.3%
5,DOW JONES,10,1.2%
6,banking,1,1.2%
7,bubble,1,1.1%
8,arts,3,1.0%
9,cash,1,1.0%


# Case analysis
## APPLE

In [8]:
df1 = sfi_nasdaq_result.set_index("ticker")
df2 = mdi_nasdaq_result.set_index("ticker")
df3 = huang_nasdaq_result.set_index("ticker")


ticker_name =  "AAPL US Equity" 

apple = pd.concat([df1.loc[ticker_name,:].to_frame().add_prefix("SFI "),
                   df2.loc[ticker_name,:].to_frame().add_prefix("MDI "),
                   df3.loc[ticker_name,:].to_frame().add_prefix("Huang ")],1)
apple

Unnamed: 0,SFI AAPL US Equity,MDI AAPL US Equity,Huang AAPL US Equity
top_1,nyse_10,short_sell_15,act_17
top_2,massachusetts_17,short_selling_6,cash_18
top_3,votes_12,short_sell_6,ceo_12
top_4,fed_20,short_selling_15,ceo_13
top_5,nyse_19,financial_markets_5,committee_9
top_6,loss_13,short_selling_9,council_12
top_7,illinois_17,consume_1,council_13
top_8,opportunity_10,BUY_AND_HOLD_9,derivatives_9
top_9,representatives_15,BUY_AND_HOLD_12,fine_20
top_10,hedge_17,short_selling_11,lifestyle_3


## GOOGLE

In [10]:
# ticker_name =  "GOOGL US Equity" 

# google = pd.concat([df1.loc[ticker_name,:].to_frame().add_prefix("SFI "),
#                     df2.loc[ticker_name,:].to_frame().add_prefix("MDI "),
#                    df3.loc[ticker_name,:].to_frame().add_prefix("Huang ")],1)
# google

## TESLA

In [12]:
# ticker_name =  "TSLA US Equity" 

# google = pd.concat([df1.loc[ticker_name,:].to_frame().add_prefix("SFI "),
#                     df2.loc[ticker_name,:].to_frame().add_prefix("MDI "),
#                     df3.loc[ticker_name,:].to_frame().add_prefix("Huang ")],1)
# google

## AMAZON

In [14]:
# ticker_name =  "AMZN US Equity" 

# google = pd.concat([df1.loc[ticker_name,:].to_frame().add_prefix("SFI "),
#                     df2.loc[ticker_name,:].to_frame().add_prefix("MDI "),
#                     df3.loc[ticker_name,:].to_frame().add_prefix("Huang ")],1)
# google