In [None]:
## Wrapper Feature selection method as outlined in Wrapper Methods (Kohavi & John, 1997).

In [3]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

# Read dataset
df = pd.read_csv('bitcoin-ta-2015.csv')
df['Timestamp'] = pd.to_datetime(df['Timestamp'])

# Create 'Target' column: 1 if the 'Close' price is higher than the previous day, 0 otherwise.
df['Target'] = np.where(df['Close'].shift(-1) > df['Close'], 1, 0)
df = df.select_dtypes(include=[np.number])

def forward_selection(df, target_column):
    initial_features = df.columns.tolist()
    initial_features.remove(target_column)
    best_features = []
    best_score = -np.inf

    while (len(initial_features)>0):
        scores_with_candidates = []
        for feature in initial_features:
            candidate_features = best_features + [feature]
            X_train, X_test, y_train, y_test = train_test_split(df[candidate_features], df[target_column], test_size=0.3, random_state=42)

            model = RandomForestClassifier(n_estimators=50, random_state=42)
            model.fit(X_train, y_train)
            predictions = model.predict(X_test)
            score = accuracy_score(y_test, predictions)

            scores_with_candidates.append((score, feature))
        
        scores_with_candidates.sort()
        best_score_new, best_feature_new = scores_with_candidates.pop()

        if best_score_new > best_score:
            best_features.append(best_feature_new)
            best_score = best_score_new
        else:
            break

    return best_features

def backward_elimination(df, target_column):
    features = df.columns.tolist()
    features.remove(target_column)
    best_score = -np.inf

    while(len(features)>0):
        scores_with_candidates = []
        for feature in features:
            candidate_features = list(set(features) - set([feature]))
            X_train, X_test, y_train, y_test = train_test_split(df[candidate_features], df[target_column], test_size=0.3, random_state=42)

            model = RandomForestClassifier(n_estimators=50, random_state=42)
            model.fit(X_train, y_train)
            predictions = model.predict(X_test)
            score = accuracy_score(y_test, predictions)

            scores_with_candidates.append((score, feature))
        
        scores_with_candidates.sort()
        best_score_new, eliminated_feature_new = scores_with_candidates.pop()

        if best_score_new > best_score:
            features.remove(eliminated_feature_new)
            best_score = best_score_new
        else:
            break

    return features

# Perform forward selection
selected_features_forward = forward_selection(df, 'Target')
print('Selected features using forward selection: ', selected_features_forward)

# Perform backward elimination
selected_features_backward = backward_elimination(df, 'Target')
print('Selected features using backward elimination: ', selected_features_backward)


Selected features using forward selection:  ['volatility_kcli']
Selected features using backward elimination:  ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'volume_adi', 'volume_obv', 'volume_cmf', 'volume_fi', 'volume_em', 'volume_sma_em', 'volume_vpt', 'volume_vwap', 'volume_mfi', 'volume_nvi', 'volatility_bbm', 'volatility_bbh', 'volatility_bbl', 'volatility_bbw', 'volatility_bbp', 'volatility_bbhi', 'volatility_bbli', 'volatility_kcc', 'volatility_kch', 'volatility_kcl', 'volatility_kcw', 'volatility_kcp', 'volatility_kchi', 'volatility_kcli', 'volatility_dcl', 'volatility_dch', 'volatility_dcm', 'volatility_dcw', 'volatility_dcp', 'volatility_atr', 'volatility_ui', 'trend_macd', 'trend_macd_signal', 'trend_macd_diff', 'trend_sma_fast', 'trend_sma_slow', 'trend_ema_fast', 'trend_ema_slow', 'trend_vortex_ind_pos', 'trend_vortex_ind_neg', 'trend_vortex_ind_diff', 'trend_trix', 'trend_mass_index', 'trend_dpo', 'trend_kst', 'trend_kst_sig', 'trend_kst_diff', 'trend_ichimoku_