In [31]:
import pandas as pd
import pandas_datareader.data as web
from datetime import datetime, timedelta
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn
import scipy
from scipy.signal import argrelextrema
import numpy as np
import xgboost as xgb
import matplotlib.pyplot as plt
from sklearn import  metrics
from sklearn.model_selection import train_test_split
from sklearn import svm
import seaborn
import json
import pickle
from tpot import TPOTClassifier
import os

Check previous 45 days. If on day 46 or 47, the high is greater than the 46 open by 5%, that is a postive. Else negative

In [32]:
def calculate_percent_increase(open_price, high_price):
    # Given an open price and a high price, calculate the percent increase
    return (high_price-open_price)/open_price

In [33]:
def calculating_average_crossover(prev_closes):
    my_df = pd.DataFrame(prev_closes)
    twenty = my_df.T.rolling(window = 20, axis = 1).mean().iloc[:,-26:].values[0]
    ten = my_df.T.rolling(window = 10, axis = 1).mean().iloc[:,-26:].values[0]
    
    twenty_over_ten = []
    for i in range(0,26):
        if twenty[i]>ten[i]:
            twenty_over_ten.append(0)
        else:
            twenty_over_ten.append(1)
    return twenty, ten, twenty_over_ten

In [34]:
def create_model_df(historical_df):
    # This function goes through every 47 period window in our historical dataframe. It keeps the 45 previous close prices
    # As training data. It calculates the percent increase from the open on day 46 to the highest point on either day 46 or 47
    # creates a data frame with the previous 45 days. The date of day 46. And the label either 1 for buy, 0 for no buy
    open_prices = historical_df['Open'].values
    close_prices = historical_df['Close'].values
    high_prices = historical_df['High'].values
    low_prices = historical_df['Low'].values
    dates = historical_df.index
    i = 0
    for j in range(45,len(historical_df)-2):
        prev_closes = close_prices[i:j]
        prev_high = high_prices[i:j]
        prev_open = open_prices[i:j]
        prev_low = low_prices[i:j]
        
        # get the averages and crossovers
        twenty_average, ten_average, cross_over = calculating_average_crossover(prev_closes)
        
        # get the label -  at least 6% gain in the next three days
        open_46 = open_prices[j]
        high_46 = high_prices[j]
        high_47 = high_prices[j+1]
        high_48 = high_prices[j+2]
        
        # get the highest price from either day 46, 47 or 48
        high_price = max(high_46, high_47, high_48)

        # Get the percent increase and deterimine if it should be a positive label or negative
        percent_increase = calculate_percent_increase(open_46, high_price)
        if percent_increase >=.06:
            label = 1
        else:
            label = 0

        # Create our model_df
        if i ==0:
            model_df = pd.DataFrame(prev_closes)
            model_df = model_df.T
            
            # Add the open high and low prices
            for index in range(len(prev_closes)):
                model_df['open_{}'.format(index)] = prev_open[index]
                model_df['high_{}'.format(index)] = prev_high[index]
                model_df['low_{}'.format(index)] = prev_low[index]
                
            # Add the averages and the crossover
            for index in range(len(twenty_average)):
                model_df['twenty_average_{}'.format(index+19)] = twenty_average[index]
                model_df['ten_average_{}'.format(index+19)] = ten_average[index]
                model_df['crossover_{}'.format(index+19)] = cross_over[index]
                
            model_df['purchase_date'] = dates[j]
            model_df['label'] = label
        else:
            append_df = pd.DataFrame(prev_closes)
            append_df = append_df.T
            
            # Add the open high and low prices
            for index in range(len(prev_closes)):
                append_df['open_{}'.format(index)] = prev_open[index]
                append_df['high_{}'.format(index)] = prev_high[index]
                append_df['low_{}'.format(index)] = prev_low[index]
            
            # Add the averages and the crossover
            for index in range(len(twenty_average)):
                append_df['twenty_average_{}'.format(index+19)] = twenty_average[index]
                append_df['ten_average_{}'.format(index+19)] = ten_average[index]
                append_df['crossover_{}'.format(index+19)] = cross_over[index]
            
            append_df['purchase_date'] = dates[j]
            append_df['label'] = label
            model_df = model_df.append(append_df)
        i+=1
    return model_df

In [35]:
def cross_validate(training_data, best_model):
    # 5 fold cross validation on the training set, determine the optimal threshold
    for i in range(1,6):
        splice_size = int(len(training_data)/5)
        dev = training_data.iloc[(i-1)*splice_size:i*splice_size]
        
        # train is everything before the dev set and everything after
        train_before = training_data.iloc[0 : (i-1)*splice_size]
        train_after = training_data.iloc[i*splice_size :]
        train = train_before.append(train_after)
        
        best_model.fit(train.iloc[:,:-3], train['label'].values)
        predictions = best_model.predict_proba(dev.iloc[:,:-3])
        #clf = svm.SVC(probability=True)
        #clf.fit(train.iloc[:,:-3], train['label'])
        #predictions = clf.predict_proba(dev.iloc[:,:-3])
        
        aftermath = dev
        aftermath['predictions'] = predictions[:,1]
        if i ==1:
            prediction_df = aftermath
        else:
            prediction_df = prediction_df.append(aftermath)
    return prediction_df

In [36]:
def find_optimal_threshold(aftermath):
    thresholds = np.arange(1,20)
    thresholds = thresholds/20
    total_buys = 0
    max_success_rate = 0
    for threshold in thresholds:
        buys = aftermath[aftermath['predictions']>threshold]
        
        good_buy = buys[buys['label']==1]
        bad_buy = buys[buys['label']==0]
        if len(buys)<10:
            continue
        success_rate = len(good_buy)/len(buys)
        print (success_rate)
        if success_rate > max_success_rate:
            max_success_rate = success_rate
            optimal_threshold = threshold
            good_buys = len(good_buy)
            total_buys = len(buys)
    if total_buys == 0:
        return 0, __
    buy_info = {}
    buy_info['purchases'] = total_buys
    buy_info['successes'] = good_buys
    buy_info['success_rate'] = good_buys/total_buys
    return optimal_threshold, buy_info

In [37]:
def hold_till_up(hist, index, open_price):
    """Function holds the stock until it hits a 5% gain"""
    hold_gain = 0
    hold_days = 0
    never_sold = 0
    # go through every day from day of purchase, calculate if high was above 5% open price. If so, sell. 
    sold = False
    # Index is the day you bought. So, we want to add days to index from index + 0 all the way 
    # to index + remaining days (remaining = len(hist)-index)
    for i in range(0,(len(hist)-index)[0]):
        high = hist.iloc[index+i]['High'].values[0]
        if (high-open_price)/open_price >= .05:
            hold_gain += 1.05 * open_price
            hold_days = i+1
            sold = True
            break
    # If you never sold it, you would still be holding, so close out
    if sold == False:
        close = hist.iloc[index+i]['Close'].values[0]
        never_sold += close-open_price
        hold_days = 0
    
    return hold_gain, hold_days, never_sold

In [47]:
def calculate_return(ticker, buy_orders, hist):
    dollar_success = 0
    two_day_dollar_fail = 0
    investment = 0
    hold_gains = 0
    days_held = []
    never_sold_closeout = 0
    investment_success = 0
    investment_failure = 0
    total_investment = 0

    results_dict = {}
    for i, row in buy_orders.iterrows():

        if row['label'] ==1:
            # Get the data for the date you are buying at open. Get the max high and make sure it was over 5% 
            index = hist[hist.index==row['purchase_date']]['index']
            # get the open price
            open_price = hist[hist.index==row['purchase_date']]['Open'].values[0]
            # Get the two high prices and ensure that one of them was higher than 5% of open
            high1 = hist[hist.index==row['purchase_date']]['High'].values[0]
            high2 = hist.iloc[index+1]['High'].values[0]
            high3 = hist.iloc[index+2]['High'].values[0]
            
            if (max(high1,high2, high3)-open_price)/open_price <=.05:
                print ('problem')
            # I would trigger a limit sell at 5% higher than open, so the increase is 1.05*open
            
            dollar_success += 1.05 * open_price
            investment_success += open_price

        else:
            # for loses, I would have three potential strategies. 
            # 1. get out after two days
            # 2. hold until it gets up 5%
            # 3. set up a trailing loss function so, if the price dips x% sell

            #### Option 1 (get out after two days) ####
            index = hist[hist.index==row['purchase_date']]['index']
            open_price = hist[hist.index==row['purchase_date']]['Open'].values[0]
            close2 = hist.iloc[index+1]['Close'].values[0]
            two_day_dollar_fail += close2

            #### Option 2 hold until it gets up 5% ####
            # Go through each day and if the high is above 5% I would sell. Otherwise go to next day, 
            # until there are no days left
            hold_gain, hold_days, never_sold = hold_till_up(hist, index, open_price)
            hold_gains += hold_gain
            days_held.append(hold_days)
            never_sold_closeout += never_sold

            investment_failure += open_price
            #### Option 3 Set up a selling trigger ####


        total_investment += open_price

    results_dict['ticker'] = ticker
    results_dict['purchases'] = len(buy_orders)
    results_dict['classic_success'] = len(buy_orders[buy_orders['label']==1])
    results_dict['classic_failures'] = len(buy_orders[buy_orders['label']==0])
    results_dict['total_investment']  = total_investment
    results_dict['investment_success'] = investment_success
    results_dict['investment_failure'] = investment_failure
    results_dict['dollar_success'] = dollar_success
    results_dict['two_day_get_out'] = two_day_dollar_fail
    results_dict['hold_gains'] = hold_gains
    results_dict['days_held'] = days_held
    results_dict['never_sold_closeout'] = never_sold_closeout
    results_dict['two_day_strategy'] = dollar_success+two_day_dollar_fail
    results_dict['two_day_strategy_roi'] = ((dollar_success+two_day_dollar_fail)-total_investment)/total_investment
    results_dict['hold_strategy'] = dollar_success+hold_gains
    results_dict['hold_strategy_roi'] = ((dollar_success+hold_gains)-total_investment)/total_investment
    
    return results_dict

In [48]:
def plot_buys(buy_order, hist, save_place, show = False):
    # Get the purchase date, open price on purchase and the previous 30 + future 30 day close prices
    purchase_date = buy_order.purchase_date
    index = hist[hist.index==purchase_date]['index'][0]
    subset = hist.iloc[index-30:index+31]
    open_price = hist[hist.index==purchase_date]['Open'].values[0]
    
    # Plot the previous 30 to the future 30 days
    plt.style.use('seaborn')
    plt.plot(np.arange(0,len(subset),1), subset['Close'])
    
    tick_labels = [x.strftime('%Y-%m-%d') for x in subset.index[::5]]
    plt.xticks(np.arange(0,len(subset),5), labels = tick_labels, rotation = 'vertical')
    
    # Plot the purchase
    plt.scatter(30, open_price,c = 'r')
    plt.title('{} close prices for 30 days before and 30 days after a recommended purchase'.format(buy_order.ticker))
    plt.savefig(save_place)
    if show:
        plt.show()
    plt.close()

In [49]:
def plot_testset(aftermath, optimal_threshold, plot, test_set_graph_save_place):
    data_to_plot = [aftermath[aftermath['label']==1]['predictions'],
               aftermath[aftermath['label']==0]['predictions']]
    x_labels = ['Buy', 'Dont-buy']
    plt.hlines(optimal_threshold, 0, 5)
    # Create a figure instance
    fig = plt.figure(1, figsize=(9, 6))
    # Create an axes instance
    ax = fig.add_subplot(111)
    # Create the boxplot
    bp = ax.boxplot(data_to_plot, whis = [5,95])

    ax.set_ylabel('Predictions (Probability of Same)')
    ax.set_xlabel('Progen Classification')
    
    ax.set_xticklabels(x_labels)
    
    plt.savefig(test_set_graph_save_place)
    
    print (plot)
    if plot == True:
        plt.show()
    plt.close()
    

In [50]:
def find_best_model_tpot(train_X):
    tpot = TPOTClassifier(generations=5, population_size=10, verbosity=2, scoring='precision_micro')
    tpot.fit(train_X.iloc[:,:-3], train_X['label'].values)
    
    model_type = tpot.fitted_pipeline_.steps[-1][0]
    best_model = tpot.fitted_pipeline_.steps[-1][1]

    return model_type, best_model

In [51]:
def predicting_days_to_purchase(buy_chart_save_place, results_file, plot, test_set_graph_save_place, ticker, 
                                start_date, end_date, model_meta_file, model_filename):

    # go through each ticker and do the following:
    # 1 get the data in a format to train a model
    # 2 run a cross validation on the training set to determine optimal thresholds
    # 3 train a model with all the training set
    # 4 test that model on the test set
    # 5 track wins and losses from purchases based on our models classifications on test set
    # 6 Plot on a chart of close prices where our purchase price would have been. 
    
    #### Pass in: Plot, save_places, ticker list
    # Get data, split into train and test then run cross validation on the train set
    company = yf.Ticker(ticker)
    hist = company.history(period='1d', interval = '1d', start = start_date, end = end_date)
    data = create_model_df(hist)
    data['ticker'] = ticker

    train_X, test_X, train_y, test_y = train_test_split(data, data['label'], test_size = .2)
    
    # Get the best model type from tpot
    model_type, best_model = find_best_model_tpot(train_X)

    # Cross Validate and determine optimal threshold from cross validation
    cv_predictions = cross_validate(train_X, best_model)
    optimal_threshold, buy_info = find_optimal_threshold(cv_predictions)
    if optimal_threshold == 0:
        print ('Not enough buys')
        return None
    # Train your model on all the training data and test on the test set. Store predictions
    #clf = svm.SVC(probability=True)
    #clf.fit(train_X.iloc[:,:-3], train_y)
    best_model.fit(train_X.iloc[:,:-3], train_y)
    
    # Save the model and model info off
    pickle.dump(best_model, open(model_filename, 'wb'))
    
    model_dict = {}
    model_dict['type'] = model_type
    model_dict['ticker'] = ticker
    model_dict['threshold'] = optimal_threshold
    with open(model_meta_file, 'w') as fp:
        json.dump(model_dict, fp)
    
    predictions = best_model.predict_proba(test_X.iloc[:,:-3])
    aftermath = test_X
    aftermath['predictions'] = predictions[:,1]

    # plot the test set buy and no buys and the optimal threshold
    plot_testset(aftermath, optimal_threshold, plot, test_set_graph_save_place)
    
    # Determine the buy_orders that would be placed with that threshold and determine their returns
    buy_orders = aftermath[aftermath['predictions'] > optimal_threshold]
    
    if len(buy_orders)==0:
        results_dict = {'Purchases':'None'}
        with open(results_file, 'w') as fp:
            json.dump(results_dict, fp)
        return
    hist['index'] = np.arange(0,len(hist))
    results_dict = calculate_return(ticker, buy_orders, hist)
    with open(results_file, 'w') as fp:
        json.dump(results_dict, fp)

    # Plot where they are on the graph of the stock previous days
    k = 0
    for i,buy in buy_orders.iterrows():
        k +=1
        plot_buys(buy, hist, buy_chart_save_place+'{}.png'.format(k))

In [52]:
def make_directories(ticker, model_version):
    if not os.path.exists('../{}/'.format(model_version)):
        os.mkdir('../{}/'.format(model_version))
        
    if not os.path.exists('../{}/data/'.format(model_version)):
        os.mkdir('../{}/data/'.format(model_version))
        
    if not os.path.exists('../{}/data/{}/'.format(model_version, ticker)):
        os.mkdir('../{}/data/{}/'.format(model_version, ticker))
        
    # Make sure the directory exists to save the buy graphs
    if not os.path.exists('../{}/data/{}/buy_graphs/'.format(model_version, ticker)):
        os.mkdir('../{}/data/{}/buy_graphs/'.format(model_version, ticker))
        
    # Make sure the directory exists to save the test results images
    if not os.path.exists('../{}/data/{}/model_graphs/'.format(model_version, ticker)):
        os.mkdir('../{}/data/{}/model_graphs/'.format(model_version, ticker))

In [53]:
def main(ticker, to_plot, model_version):
    # 3 Years of Data
    days_to_subtract = 365*5
    end_date = (datetime.today())
    start_date = end_date-timedelta(days=days_to_subtract)

    end_date = end_date.strftime('%Y-%m-%d')
    start_date = start_date.strftime('%Y-%m-%d')
    
    make_directories(ticker, model_version)
        
    buy_chart_save_place = '../{}/data/{}/buy_graphs/purchase'.format(model_version, ticker)
    
    # Save the results
    results_file = '../{}/data/{}/purchase_results.json'.format(model_version, ticker)
    
    test_graph_save_place = '../{}/data/{}/model_graphs/test_results.png'.format(model_version, ticker)
    
    model_meta_file = '../{}/data/{}/meta_data.json'.format(model_version, ticker)
    model_filename = '../{}/data/{}/model.sav'.format(model_version, ticker)

    predicting_days_to_purchase(buy_chart_save_place, results_file, to_plot, 
                                test_graph_save_place, ticker, start_date, end_date, model_meta_file, model_filename)

In [54]:
Tickers = ['PINS', 'SFIX', 'AMD', 'CSTM', 'WFC', 'T', 'NCLH', 'INTC', 'PTON', 'LUV', 'UAA', 'KAR', 'SYF']
model_version = 'version_2'

In [55]:
for ticker in Tickers:
    print (ticker)
    main(ticker, False, model_version)

PINS


HBox(children=(IntProgress(value=0, description='Optimization Progress', max=60, style=ProgressStyle(descripti…

Generation 1 - Current best internal CV score: 0.771921921921922
Generation 2 - Current best internal CV score: 0.771921921921922
Generation 3 - Current best internal CV score: 0.771921921921922
Generation 4 - Current best internal CV score: 0.771921921921922
Generation 5 - Current best internal CV score: 0.771921921921922

Best pipeline: GradientBoostingClassifier(input_matrix, learning_rate=0.01, max_depth=2, max_features=0.6500000000000001, min_samples_leaf=1, min_samples_split=15, n_estimators=100, subsample=0.8500000000000001)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

0.26666666666666666
0.26666666666666666
0.26666666666666666
0.2845528455284553
0.375
0.425531914893617
0.4411764705882353
0.4642857142857143
0.7058823529411765
0.75
0.7857142857142857
0.8181818181818182


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  "Adding an axes using the same arguments as a previous axes "


False
SFIX


HBox(children=(IntProgress(value=0, description='Optimization Progress', max=60, style=ProgressStyle(descripti…

Generation 1 - Current best internal CV score: 0.7328071379547015
Generation 2 - Current best internal CV score: 0.7328071379547015
Generation 3 - Current best internal CV score: 0.7328071379547015
Generation 4 - Current best internal CV score: 0.7328071379547015
Generation 5 - Current best internal CV score: 0.7328071379547015

Best pipeline: RandomForestClassifier(input_matrix, bootstrap=False, criterion=entropy, max_features=0.1, min_samples_leaf=6, min_samples_split=8, n_estimators=100)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

0.3755656108597285
0.39598997493734334
0.4383954154727794
0.4869281045751634
0.5129151291512916
0.5518672199170125
0.5943396226415094
0.6182795698924731
0.6607142857142857
0.6762589928057554
0.6942148760330579
0.7272727272727273
0.7682926829268293
0.8125
0.8076923076923077
0.7857142857142857
0.7931034482758621
0.6


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  "Adding an axes using the same arguments as a previous axes "


False
AMD


HBox(children=(IntProgress(value=0, description='Optimization Progress', max=60, style=ProgressStyle(descripti…

Generation 1 - Current best internal CV score: 0.7314139201965707
Generation 2 - Current best internal CV score: 0.7314139201965707
Generation 3 - Current best internal CV score: 0.7314139201965707
Generation 4 - Current best internal CV score: 0.7314246033865712
Generation 5 - Current best internal CV score: 0.7314246033865712

Best pipeline: ExtraTreesClassifier(OneHotEncoder(input_matrix, minimum_fraction=0.2, sparse=False, threshold=10), bootstrap=True, criterion=entropy, max_features=0.35000000000000003, min_samples_leaf=1, min_samples_split=7, n_estimators=100)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

0.30119176598049835
0.33374844333748444
0.3551673944687045
0.3815126050420168
0.3967611336032389
0.4282238442822384
0.4518950437317784
0.48161764705882354
0.4975609756097561
0.5490196078431373
0.5789473684210527
0.6395348837209303
0.631578947368421
0.6410256410256411
0.72
0.8


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  "Adding an axes using the same arguments as a previous axes "


False
CSTM


HBox(children=(IntProgress(value=0, description='Optimization Progress', max=60, style=ProgressStyle(descripti…

Generation 1 - Current best internal CV score: 0.7480019010599642
Generation 2 - Current best internal CV score: 0.7480019010599642
Generation 3 - Current best internal CV score: 0.7480019010599642
Generation 4 - Current best internal CV score: 0.7500905194906584
Generation 5 - Current best internal CV score: 0.7562706901203777

Best pipeline: ExtraTreesClassifier(input_matrix, bootstrap=False, criterion=gini, max_features=0.35000000000000003, min_samples_leaf=4, min_samples_split=19, n_estimators=100)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

0.32432432432432434
0.3475533249686324
0.37965616045845274
0.4161073825503356
0.44901960784313727
0.47126436781609193
0.5056179775280899
0.5536332179930796
0.6088888888888889
0.6265060240963856
0.6638655462184874
0.7241379310344828
0.7910447761194029
0.7857142857142857
0.8571428571428571


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  "Adding an axes using the same arguments as a previous axes "


False
WFC


HBox(children=(IntProgress(value=0, description='Optimization Progress', max=60, style=ProgressStyle(descripti…

Generation 1 - Current best internal CV score: 0.9731474732201189
Generation 2 - Current best internal CV score: 0.9731474732201189
Generation 3 - Current best internal CV score: 0.9731474732201189
Generation 4 - Current best internal CV score: 0.9731474732201189
Generation 5 - Current best internal CV score: 0.9731474732201189

Best pipeline: KNeighborsClassifier(input_matrix, n_neighbors=95, p=2, weights=uniform)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

0.3157894736842105
0.38461538461538464


  "Adding an axes using the same arguments as a previous axes "


False
T


HBox(children=(IntProgress(value=0, description='Optimization Progress', max=60, style=ProgressStyle(descripti…

Generation 1 - Current best internal CV score: 0.9834782275218149
Generation 2 - Current best internal CV score: 0.9834782275218149
Generation 3 - Current best internal CV score: 0.9834782275218149
Generation 4 - Current best internal CV score: 0.9834782275218149
Generation 5 - Current best internal CV score: 0.9845144969518668

Best pipeline: RandomForestClassifier(input_matrix, bootstrap=True, criterion=entropy, max_features=0.6500000000000001, min_samples_leaf=2, min_samples_split=18, n_estimators=100)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

0.11627906976744186
0.13793103448275862
0.1891891891891892
0.17857142857142858
0.25
0.2727272727272727


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  "Adding an axes using the same arguments as a previous axes "


False
NCLH


HBox(children=(IntProgress(value=0, description='Optimization Progress', max=60, style=ProgressStyle(descripti…

Generation 1 - Current best internal CV score: 0.9307729287965385
Generation 2 - Current best internal CV score: 0.9307729287965385
Generation 3 - Current best internal CV score: 0.9307729287965385
Generation 4 - Current best internal CV score: 0.9317985150365897
Generation 5 - Current best internal CV score: 0.9328347844666416

Best pipeline: KNeighborsClassifier(RFE(input_matrix, criterion=entropy, max_features=0.35000000000000003, n_estimators=100, step=0.6500000000000001), n_neighbors=22, p=1, weights=distance)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

0.1842818428184282
0.27510917030567683
0.42424242424242425
0.5
0.5238095238095238
0.5714285714285714
0.6190476190476191
0.6666666666666666
0.6875
0.7142857142857143
0.717948717948718
0.6944444444444444
0.7096774193548387
0.65
0.6363636363636364


  "Adding an axes using the same arguments as a previous axes "


False
INTC


HBox(children=(IntProgress(value=0, description='Optimization Progress', max=60, style=ProgressStyle(descripti…

Generation 1 - Current best internal CV score: 0.9607499599380376
Generation 2 - Current best internal CV score: 0.9607499599380376
Generation 3 - Current best internal CV score: 0.9607499599380376
Generation 4 - Current best internal CV score: 0.9607499599380376
Generation 5 - Current best internal CV score: 0.9607499599380376

Best pipeline: RandomForestClassifier(input_matrix, bootstrap=True, criterion=entropy, max_features=0.2, min_samples_leaf=15, min_samples_split=8, n_estimators=100)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

0.12173913043478261
0.1650485436893204
0.23333333333333334
0.24324324324324326
0.2857142857142857
0.29411764705882354


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  "Adding an axes using the same arguments as a previous axes "


False
PTON


HBox(children=(IntProgress(value=0, description='Optimization Progress', max=60, style=ProgressStyle(descripti…

Generation 1 - Current best internal CV score: 0.6689473684210526
Generation 2 - Current best internal CV score: 0.6689473684210526
Generation 3 - Current best internal CV score: 0.6689473684210526
Generation 4 - Current best internal CV score: 0.6878947368421053
Generation 5 - Current best internal CV score: 0.6878947368421053

Best pipeline: ExtraTreesClassifier(KNeighborsClassifier(input_matrix, n_neighbors=2, p=2, weights=distance), bootstrap=True, criterion=gini, max_features=0.4, min_samples_leaf=12, min_samples_split=4, n_estimators=100)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

0.4842105263157895
0.4842105263157895
0.4842105263157895
0.4842105263157895
0.4842105263157895
0.4942528735632184
0.524390243902439
0.5492957746478874
0.5409836065573771
0.4722222222222222
0.48
0.1


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  "Adding an axes using the same arguments as a previous axes "


False
LUV


HBox(children=(IntProgress(value=0, description='Optimization Progress', max=60, style=ProgressStyle(descripti…

Generation 1 - Current best internal CV score: 0.9328508092516424
Generation 2 - Current best internal CV score: 0.9328508092516424
Generation 3 - Current best internal CV score: 0.9328561508466426
Generation 4 - Current best internal CV score: 0.9328561508466426
Generation 5 - Current best internal CV score: 0.9328561508466426

Best pipeline: KNeighborsClassifier(input_matrix, n_neighbors=15, p=1, weights=distance)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

0.135678391959799
0.19665271966527198
0.2708333333333333
0.3548387096774194
0.3709677419354839
0.3157894736842105
0.09523809523809523


  "Adding an axes using the same arguments as a previous axes "


False
UAA


HBox(children=(IntProgress(value=0, description='Optimization Progress', max=60, style=ProgressStyle(descripti…

Generation 1 - Current best internal CV score: 0.8656894329896907
Generation 2 - Current best internal CV score: 0.8656894329896907
Generation 3 - Current best internal CV score: 0.8656894329896907
Generation 4 - Current best internal CV score: 0.8656894329896907
Generation 5 - Current best internal CV score: 0.8656894329896907

Best pipeline: GradientBoostingClassifier(CombineDFs(input_matrix, input_matrix), learning_rate=0.1, max_depth=8, max_features=0.6500000000000001, min_samples_leaf=2, min_samples_split=10, n_estimators=100, subsample=0.8)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

0.2914572864321608
0.3188405797101449
0.3644859813084112
0.4
0.4
0.42857142857142855
0.43548387096774194
0.4444444444444444
0.4489795918367347
0.5121951219512195
0.5128205128205128
0.48484848484848486
0.4666666666666667
0.5
0.5454545454545454
0.55
0.5882352941176471
0.6363636363636364


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  "Adding an axes using the same arguments as a previous axes "


False
KAR


HBox(children=(IntProgress(value=0, description='Optimization Progress', max=60, style=ProgressStyle(descripti…

Generation 1 - Current best internal CV score: 0.9638427434431922
Generation 2 - Current best internal CV score: 0.9638427434431922
Generation 3 - Current best internal CV score: 0.9638427434431922
Generation 4 - Current best internal CV score: 0.9638427434431922
Generation 5 - Current best internal CV score: 0.9638427434431922

Best pipeline: ExtraTreesClassifier(input_matrix, bootstrap=True, criterion=gini, max_features=0.35000000000000003, min_samples_leaf=10, min_samples_split=7, n_estimators=100)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

0.09871244635193133
0.13253012048192772
0.25925925925925924
0.35714285714285715


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  "Adding an axes using the same arguments as a previous axes "


False
SYF


HBox(children=(IntProgress(value=0, description='Optimization Progress', max=60, style=ProgressStyle(descripti…

Generation 1 - Current best internal CV score: 0.9483569587628866
Generation 2 - Current best internal CV score: 0.9483569587628866
Generation 3 - Current best internal CV score: 0.9483569587628866
Generation 4 - Current best internal CV score: 0.9483569587628866
Generation 5 - Current best internal CV score: 0.9483569587628866

Best pipeline: ExtraTreesClassifier(input_matrix, bootstrap=False, criterion=gini, max_features=0.1, min_samples_leaf=8, min_samples_split=7, n_estimators=100)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

0.1441717791411043
0.26666666666666666
0.39285714285714285
0.42857142857142855
0.4772727272727273
0.45714285714285713
0.5172413793103449
0.5357142857142857
0.6
0.6818181818181818
0.7058823529411765
0.6875


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  "Adding an axes using the same arguments as a previous axes "


False
