In [1]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
import math
import utils
import numpy as np
import pandas as pd
from datetime import datetime
from dateutil.relativedelta import relativedelta
from progressbar import ProgressBar

In [2]:
futuresList = utils.futuresList

In [3]:
def data_preprocessing(ticker_df):
    '''
    Input: ticker_df
    Output: ticker df with additional variables
    '''
    ticker_df.columns = ['DATE', 'OPEN', 'HIGH', 'LOW', 'CLOSE', 'VOL', 'OI', 'P', 'R', 'RINFO']
    ticker_df = ticker_df[(ticker_df["VOL"] != 0)]
    
    # create X variables
    ticker_df["CLOSE_PCT"] = ticker_df["CLOSE"].pct_change(periods=1)
    ticker_df["CLOSE_DIFF"] = ticker_df["CLOSE"].diff(periods=1)
    ticker_df["CLOSE_PCT_SHIFT1"] = ticker_df["CLOSE_PCT"].shift(1)
    ticker_df["CLOSE_DIFF_SHIFT1"] = ticker_df["CLOSE_DIFF"].shift(1)
    
    ticker_df["VOL_PCT_SHIFT1"] = ticker_df["CLOSE"].pct_change(periods=1).shift(1)
    ticker_df["VOL_DIFF_SHIFT1"] = ticker_df["CLOSE"].diff(periods=1).shift(1)
    
    ticker_df["RANGE_SHIFT1"] = (ticker_df["HIGH"] - ticker_df["LOW"]).shift(1)
    ticker_df["RANGE_PCT_SHIFT1"] = ticker_df["RANGE_SHIFT1"].pct_change(periods=1)
    ticker_df["RANGE_DIFF_SHIFT1"] = ticker_df["RANGE_SHIFT1"].diff(periods=1)
    
    # create y variables
    ticker_df["LONG"] = [np.nan if math.isnan(x) else 1 if x > 0 else -1 for x in ticker_df.CLOSE.diff(periods=1)]
    
    return ticker_df

In [4]:
def cost_fn(y_actual, y_pred, diff_returns):
    mask = (y_actual != y_pred).apply(lambda x: int(x))
    return sum(mask * abs(diff_returns))

In [5]:
def future_strat_longshort(sig, mag):
    return sig * mag

def future_strat_fixed_threshold(sig, mag, threshold): 
    mag_mask = (mag > threshold).astype("int")
    return mag_mask * mag * sig

def future_strat_perc_threshold(sig, mag, perc):
    mag_percentile = mag.rank(method="average", pct=True)
    mag_mask = (mag_percentile > perc).astype("int")
    return mag_mask * mag * sig

In [6]:
def generate_time_intervals(start_date, end_date, months=3):
    dates = []
    
    # generate first and last quarter
    current_month = (start_date.month - 1)//months*months + 1
    end_month = (end_date.month - 1)//months*months + 1
    current_quarter = datetime(start_date.year, current_month, 1)
    end_quarter = datetime(end_date.year, end_month, 1)
    
    # generate dates of all quarters within first and last quarter
    if current_quarter == start_date:
        dates.append(current_quarter)
        
    while current_quarter < end_quarter:
        current_quarter += relativedelta(months=months)
        dates.append(current_quarter)
    
    return dates

In [7]:
def generate_train_test(df, time_intervals, n_train=8, n_test=1):
    train_test_sets = []
    
    for i in range(len(time_intervals) - n_train - 1):
        # get train start and ends
        train_start = time_intervals[i]
        train_end = time_intervals[i+n_train]
        test_end = time_intervals[i+n_train+1]
        
        # split train and test data
        train_df = df[(df.DATE >= train_start) & (df.DATE < train_end)]
        test_df = df[(df.DATE >= train_end) & (df.DATE < test_end)]
        
        train_test_sets.append([train_df, test_df])
        
    return train_test_sets

In [8]:
def walk_forward_validation(train_test_sets, y_var, X_vars, cost_var, model):
    accuracies = []
    costs = []
    for i in range(len(train_test_sets)):
        train_df = train_test_sets[i][0]
        test_df = train_test_sets[i][1]
        
        X_train, y_train = train_df[X_vars].to_numpy(), train_df[y_var].to_numpy()
        X_test, y_test = test_df[X_vars].to_numpy(), test_df[y_var].to_numpy()
        
        # scale data
        scaler = StandardScaler()
        scaler.fit(X_train)
        X_train = scaler.transform(X_train)
        X_test = scaler.transform(X_test)
        
        # train logreg model
        fitted = model.fit(X_train, y_train)
        y_pred = fitted.predict(X_test)
        
        accuracies.append(metrics.accuracy_score(pd.Series(y_test), pd.Series(y_pred)))
        costs.append(cost_fn(pd.Series(y_test), pd.Series(y_pred), test_df[cost_var].reset_index(drop=True)))
    
    return np.mean(accuracies), np.mean(costs)

In [10]:
future = futuresList[0]
df = pd.read_csv(f"tickerData/{future}.txt", parse_dates = ["DATE"])
df = data_preprocessing(df)
dates = generate_time_intervals(datetime(2010,1,1), datetime(2015,1,1))
train_test_sets = generate_train_test(df, dates)
accuracy, cost = walk_forward_validation(train_test_sets, "LONG", ["CLOSE_PCT_SHIFT1", "VOL_PCT_SHIFT1"], 
                                            "CLOSE_DIFF", LogisticRegression())
print(accuracy, cost)

0.5027728769176746 13461.666666666666


In [33]:
def train_val_lr(future, y_var="LONG", X_vars=["CLOSE_PCT", "VOL_PCT"]):
    df = pd.read_csv(f"tickerData/{future}.txt", parse_dates = ["DATE"])
    df = data_preprocessing(df)

    # set start and end date
    train_start_date = dt.datetime(2010, 1, 1, 0, 0, 0)
    train_end_date = dt.datetime(2019, 1, 1, 0, 0, 0)
    test_end_date = dt.datetime(2019, 12, 1, 0, 0, 0)

    # slice 
    df_sliced = df[["DATE", "LONG", "CLOSE_PCT", "VOL_PCT", "RANGE_PCT", "CLOSE_DIFF", "VOL_DIFF", 
                    "RANGE_DIFF", "RANGE_SHIFT1"]]
    df_sliced = df.dropna()
    
    # set train test data
    train = df_sliced[(df_sliced.DATE > train_start_date) & (df_sliced.DATE < train_end_date)].set_index("DATE")
    test = df_sliced[(df_sliced.DATE > train_end_date) & (df_sliced.DATE < test_end_date)].set_index("DATE")
    X_train, y_train = train[X_vars].to_numpy(), train["LONG"].to_numpy()
    X_test, y_test = test[X_vars].to_numpy(), test["LONG"].to_numpy()
    
    # scale data
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    
    # train logreg model
    lr_model = LogisticRegression().fit(X_train, y_train)
    y_pred = lr_model.predict(X_test)
    
    accuracy = metrics.accuracy_score(y_test, y_pred)
    roc_auc = metrics.roc_auc_score(y_test, y_pred)
    f1 = metrics.f1_score(y_test, y_pred)
    precision = metrics.precision_score(y_test, y_pred)
    recall = metrics.recall_score(y_test, y_pred)
    
    return lr_model, accuracy, roc_auc, f1, precision, recall

In [30]:
def train_val_svm(future, y_var="LONG", X_vars=["CLOSE_PCT", "VOL_PCT"]):
    df = pd.read_csv(f"tickerData/{future}.txt", parse_dates = ["DATE"])
    df.columns = ['DATE', 'OPEN', 'HIGH', 'LOW', 'CLOSE', 'VOL', 'OI', 'P', 'R', 'RINFO']
    df = df[(df["CLOSE"] != 0) & (df["VOL"] != 0)]
    
    # create x variables to use
    df["CLOSE_PCT"] = df["CLOSE"].pct_change(periods=1)
    df["CLOSE_DIFF"] = df["CLOSE"].diff(periods=1)
    df["VOL_PCT"] = df["VOL"].pct_change(periods=1)
    df["VOL_DIFF"] = df["VOL"].pct_change(periods=1)
    df["CLOSE_SHIFT1"] = df["CLOSE"].shift(periods=1)
    df["VOL_SHIFT1"] = df["VOL"].shift(periods=1)

    # create y variables (long if increase, short if decrease)
    df["LONG"] = [1 if x > 0 else -1 for x in df.CLOSE_DIFF]

    # set start and end date
    train_start_date = dt.datetime(2010, 1, 1, 0, 0, 0)
    train_end_date = dt.datetime(2019, 1, 1, 0, 0, 0)
    test_end_date = dt.datetime(2019, 12, 1, 0, 0, 0)

    # slice 
    df_sliced = df[["DATE", "LONG", "CLOSE_SHIFT1", "VOL_SHIFT1", "CLOSE_PCT", "VOL_PCT", "CLOSE_DIFF", "VOL_DIFF"]]
    df_sliced = df.dropna()
    
    # set train test data
    train = df_sliced[(df_sliced.DATE > train_start_date) & (df_sliced.DATE < train_end_date)].set_index("DATE")
    test = df_sliced[(df_sliced.DATE > train_end_date) & (df_sliced.DATE < test_end_date)].set_index("DATE")
    X_train, y_train = train[X_vars].to_numpy(), train["LONG"].to_numpy()
    X_test, y_test = test[X_vars].to_numpy(), test["LONG"].to_numpy()
    
    # scale data
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    
    # train svm model
    svm_model = SVC().fit(X_train, y_train)
    y_pred = svm_model.predict(X_test)
    
    accuracy = metrics.accuracy_score(y_test, y_pred)
    roc_auc = metrics.roc_auc_score(y_test, y_pred)
    f1 = metrics.f1_score(y_test, y_pred)
    precision = metrics.precision_score(y_test, y_pred)
    recall = metrics.recall_score(y_test, y_pred)
    
    return svm_model, accuracy, roc_auc, f1, precision, recall

In [36]:
pbar = ProgressBar()

accuracy = []
roc = []
f1 = []
precision = []
recall = []

X_vars = ["CLOSE_PCT", "VOL_PCT"]
# X_vars = ["CLOSE_SHIFT1", "VOL_SHIFT1"]
# X_vars = ["CLOSE_DIFF", "VOL_DIFF"]

for future in pbar(futuresList):
    f_model, f_accuracy, f_roc, f_f1, f_precision, f_recall = train_val_lr(future, X_vars=X_vars)
    accuracy.append(f_accuracy)
    roc.append(f_roc)
    f1.append(f_f1)
    precision.append(f_precision)
    recall.append(f_recall)

results = pd.DataFrame({
    "future": futuresList, 
    "accuracy": accuracy,
    "roc": roc, 
    "f1": f1,
    "precision": precision,
    "recall": recall
})

results.to_csv("model_metrics/logistic_regression/lr_pct_results.csv", index=False)
# results.to_csv("model_metrics/logistic_regression/lr_shift1_results.csv", index=False)
# results.to_csv("model_metrics/logistic_regression/lr_diff_results.csv", index=False)

  _warn_prf(average, modifier, msg_start, len(result))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["CLOSE_PCT"] = ticker_df["CLOSE"].pct_change(periods=1).shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["CLOSE_DIFF"] = ticker_df["CLOSE"].diff(periods=1).shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticke

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["CLOSE_PCT"] = ticker_df["CLOSE"].pct_change(periods=1).shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["CLOSE_DIFF"] = ticker_df["CLOSE"].diff(periods=1).shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["VOL_PCT"] = ticker_df["CLOSE"].pct_change(periods

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["CLOSE_PCT"] = ticker_df["CLOSE"].pct_change(periods=1).shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["CLOSE_DIFF"] = ticker_df["CLOSE"].diff(periods=1).shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["VOL_PCT"] = ticker_df["CLOSE"].pct_change(periods

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["CLOSE_PCT"] = ticker_df["CLOSE"].pct_change(periods=1).shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["CLOSE_DIFF"] = ticker_df["CLOSE"].diff(periods=1).shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["VOL_PCT"] = ticker_df["CLOSE"].pct_change(periods

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["RANGE_PCT"] = ticker_df["RANGE_SHIFT1"].pct_change(periods=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["RANGE_DIFF"] = ticker_df["RANGE_SHIFT1"].diff(periods=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["LONG"] = [np.nan if math.isnan(x) else 1 if x > 0 els

  _warn_prf(average, modifier, msg_start, len(result))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["CLOSE_PCT"] = ticker_df["CLOSE"].pct_change(periods=1).shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["CLOSE_DIFF"] = ticker_df["CLOSE"].diff(periods=1).shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticke

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["CLOSE_PCT"] = ticker_df["CLOSE"].pct_change(periods=1).shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["CLOSE_DIFF"] = ticker_df["CLOSE"].diff(periods=1).shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["VOL_PCT"] = ticker_df["CLOSE"].pct_change(periods

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["CLOSE_PCT"] = ticker_df["CLOSE"].pct_change(periods=1).shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["CLOSE_DIFF"] = ticker_df["CLOSE"].diff(periods=1).shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["VOL_PCT"] = ticker_df["CLOSE"].pct_change(periods

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["CLOSE_PCT"] = ticker_df["CLOSE"].pct_change(periods=1).shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["CLOSE_DIFF"] = ticker_df["CLOSE"].diff(periods=1).shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["VOL_PCT"] = ticker_df["CLOSE"].pct_change(periods

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["CLOSE_PCT"] = ticker_df["CLOSE"].pct_change(periods=1).shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["CLOSE_DIFF"] = ticker_df["CLOSE"].diff(periods=1).shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["VOL_PCT"] = ticker_df["CLOSE"].pct_change(periods

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["CLOSE_PCT"] = ticker_df["CLOSE"].pct_change(periods=1).shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["CLOSE_DIFF"] = ticker_df["CLOSE"].diff(periods=1).shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["VOL_PCT"] = ticker_df["CLOSE"].pct_change(periods

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["RANGE_DIFF"] = ticker_df["RANGE_SHIFT1"].diff(periods=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["LONG"] = [np.nan if math.isnan(x) else 1 if x > 0 else -1 for x in ticker_df.CLOSE.diff(periods=1)]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["CLOSE_PCT"] = t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["CLOSE_PCT"] = ticker_df["CLOSE"].pct_change(periods=1).shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["CLOSE_DIFF"] = ticker_df["CLOSE"].diff(periods=1).shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["VOL_PCT"] = ticker_df["CLOSE"].pct_change(periods

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["RANGE_SHIFT1"] = (ticker_df["HIGH"] - ticker_df["LOW"]).shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["RANGE_PCT"] = ticker_df["RANGE_SHIFT1"].pct_change(periods=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["RANGE_DIFF"] = ticker_df["RANGE_SHIFT1"].diff

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["CLOSE_PCT"] = ticker_df["CLOSE"].pct_change(periods=1).shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["CLOSE_DIFF"] = ticker_df["CLOSE"].diff(periods=1).shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["VOL_PCT"] = ticker_df["CLOSE"].pct_change(periods

  _warn_prf(average, modifier, msg_start, len(result))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["CLOSE_PCT"] = ticker_df["CLOSE"].pct_change(periods=1).shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["CLOSE_DIFF"] = ticker_df["CLOSE"].diff(periods=1).shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticke

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["CLOSE_PCT"] = ticker_df["CLOSE"].pct_change(periods=1).shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["CLOSE_DIFF"] = ticker_df["CLOSE"].diff(periods=1).shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_df["VOL_PCT"] = ticker_df["CLOSE"].pct_change(periods

In [10]:
pbar = ProgressBar()

accuracy = []
roc = []
f1 = []
precision = []
recall = []

# X_vars = ["CLOSE_PCT", "VOL_PCT"]
# X_vars = ["CLOSE_SHIFT1", "VOL_SHIFT1"]
X_vars = ["CLOSE_DIFF", "VOL_DIFF"]

for future in pbar(futuresList):
    f_model, f_accuracy, f_roc, f_f1, f_precision, f_recall = train_val_svm(future, X_vars=X_vars)
    accuracy.append(f_accuracy)
    roc.append(f_roc)
    f1.append(f_f1)
    precision.append(f_precision)
    recall.append(f_recall)

results = pd.DataFrame({
    "future": futuresList, 
    "accuracy": accuracy,
    "roc": roc, 
    "f1": f1,
    "precision": precision,
    "recall": recall
})

# results.to_csv("model_metrics/svm/svm_pct_results.csv", index=False)
# results.to_csv("model_metrics/svm/svm_shift1_results.csv", index=False)
results.to_csv("model_metrics/svm/svm_diff_results.csv", index=False)

100% |########################################################################|
