# Predictive Modeling - Apple, Inc. (AAPL)

## Importing Libraries:

In [113]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns

from datetime import datetime

from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit

from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

-----

## Company Name

In [114]:
company_name = 'Apple'

## Importing the Data:

In [115]:
def data_reader(company_name):
    company_name=company_name
    df = pd.read_csv(f'data/{company_name}_Clean.csv')
    df['Date'] = pd.to_datetime(df.Date)
    df.set_index('Date', inplace=True)
    df.sort_index(inplace=True, ascending=True)
    return df

In [116]:
df = data_reader('Apple')

In [117]:
df.head(3)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Ex_Dividend,Split_Ratio,Adj_Open,Adj_High,Adj_Low,Adj_Close,Adj_Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1980-12-12,28.75,28.87,28.75,28.75,2093900.0,0.0,1.0,0.422706,0.42447,0.422706,0.422706,117258400.0
1980-12-15,27.38,27.38,27.25,27.25,785200.0,0.0,1.0,0.402563,0.402563,0.400652,0.400652,43971200.0
1980-12-16,25.37,25.37,25.25,25.25,472000.0,0.0,1.0,0.37301,0.37301,0.371246,0.371246,26432000.0


### Dropping the Everything Before July 2014:

In [118]:
df.drop(df[:'2014-07-01'].index, inplace=True)

In [119]:
df.head(3)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Ex_Dividend,Split_Ratio,Adj_Open,Adj_High,Adj_Low,Adj_Close,Adj_Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2014-07-02,93.865,94.06,93.09,93.48,28465000.0,0.0,1.0,88.348738,88.532278,87.619283,87.986364,28465000.0
2014-07-03,93.67,94.1,93.2,94.03,22891800.0,0.0,1.0,88.165198,88.569928,87.722819,88.504041,22891800.0
2014-07-07,94.14,95.99,94.1,95.968,56468000.0,0.0,1.0,88.607577,90.348856,88.569928,90.328149,56468000.0


-----

# Splitting the Data a Training and Testing Set

## Creating a Function for the Training Set with Feature Engineering:

In [120]:
def shift_dates(df):
    shifted_df = pd.DataFrame(df[:-1].values, index = df[1:].index, columns=df.columns)
    return shifted_df

In [121]:
def lag_and_shift_data(dataframe):
    temp_df = dataframe.copy()
    
    short_SMA = temp_df.rolling(window=12).mean().copy()
    mid_SMA = temp_df.rolling(window=26).mean().copy()
    long_SMA = temp_df.rolling(window=85).mean().copy()

    short_EMA = temp_df.ewm(span=12, adjust=False).mean().copy()
    mid_EMA = temp_df.ewm(span=26, adjust=False).mean().copy()
    long_EMA = temp_df.ewm(span=85, adjust=False).mean().copy()
    
    temp_df = pd.merge(temp_df, short_SMA, left_index=True, right_index=True, suffixes=['','_Short_SMA'])
    temp_df = pd.merge(temp_df, mid_SMA, left_index=True, right_index=True, suffixes=['','_Mid_SMA'])
    temp_df = pd.merge(temp_df, long_SMA, left_index=True, right_index=True, suffixes=['','_Long_SMA'])
    
    temp_df = pd.merge(temp_df, short_EMA, left_index=True, right_index=True, suffixes=['','_Short_EMA'])
    temp_df = pd.merge(temp_df, mid_EMA, left_index=True, right_index=True, suffixes=['','_Mid_EMA'])
    temp_df = pd.merge(temp_df, long_EMA, left_index=True, right_index=True, suffixes=['','_Long_EMA'])
    
    temp_df.dropna(inplace=True) 
    
    temp_df = shift_dates(temp_df)

    return temp_df

In [122]:
df_shift = lag_and_shift_data(df)

In [123]:
df_shift.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Ex_Dividend,Split_Ratio,Adj_Open,Adj_High,Adj_Low,...,Low_Long_EMA,Close_Long_EMA,Volume_Long_EMA,Ex_Dividend_Long_EMA,Split_Ratio_Long_EMA,Adj_Open_Long_EMA,Adj_High_Long_EMA,Adj_Low_Long_EMA,Adj_Close_Long_EMA,Adj_Volume_Long_EMA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-10-31,106.959,107.35,105.9,106.98,40654793.0,0.0,1.0,101.174038,101.543891,100.172315,...,98.626302,99.449687,55034280.0,0.002727,1.0,93.946729,94.716652,93.184824,93.963038,55034280.0
2014-11-03,108.01,108.04,107.21,108.0,44639285.0,0.0,1.0,102.168194,102.196572,101.411463,...,98.825923,99.648531,54792540.0,0.002664,1.0,94.137926,94.890604,93.376141,94.153636,54792540.0
2014-11-04,108.22,110.3,108.01,109.4,52282550.0,0.0,1.0,102.366836,104.334338,102.168194,...,99.039506,99.87531,54734170.0,0.002602,1.0,94.329296,95.110226,93.580607,94.370598,54734170.0
2014-11-05,109.36,109.49,107.72,108.6,41574365.0,0.0,1.0,103.445178,103.568147,101.893879,...,99.241378,100.078209,54428120.0,0.002541,1.0,94.541293,95.306922,93.773939,94.564917,54428120.0
2014-11-06,109.1,109.3,108.125,108.86,37435905.0,0.0,1.0,103.199241,103.388423,102.276974,...,99.447974,100.282437,54032960.0,0.002482,1.0,94.742641,95.494863,93.971684,94.760435,54032960.0


## Taking a Look at the Time-Shifted Data Set:

In [124]:
df_shift.tail(3)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Ex_Dividend,Split_Ratio,Adj_Open,Adj_High,Adj_Low,...,Low_Long_EMA,Close_Long_EMA,Volume_Long_EMA,Ex_Dividend_Long_EMA,Split_Ratio_Long_EMA,Adj_Open_Long_EMA,Adj_High_Long_EMA,Adj_Low_Long_EMA,Adj_Close_Long_EMA,Adj_Volume_Long_EMA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-03-23,170.0,172.68,168.6,168.845,41051076.0,0.0,1.0,170.0,172.68,168.6,...,169.691723,171.097703,33582580.0,0.000518,1.0,171.126419,172.657153,169.671634,171.077463,33582580.0
2018-03-26,168.39,169.92,164.94,164.94,40248954.0,0.0,1.0,168.39,169.92,164.94,...,169.581218,170.9545,33737610.0,0.000506,1.0,171.062781,172.593499,169.561596,170.934731,33737610.0
2018-03-27,168.07,173.1,166.44,172.77,36272617.0,0.0,1.0,168.07,173.1,166.44,...,169.508166,170.996721,33796570.0,0.000494,1.0,170.993182,172.605278,169.489,170.977412,33796570.0


In [125]:
# df_shift['2014-07-01':]

## Dropping Everything Before July 2014:

In [126]:
# df_shift.drop(df_shift[:'2014-07-01'].index, inplace=True)

In [127]:
# df_shift.head(3)

In [128]:
def data_split(dataframe):
    temp_df = dataframe.copy()
    X_test = temp_df['2016-07-02':'2017-03-27']
    temp_df.drop(temp_df['2016-03-02':'2016-12-01'].index, inplace=True)
    X_train = temp_df
    return X_train, X_test

In [129]:
X_train, X_test = data_split(df_shift)

## Creating a Function for a Training a Testing Set: 

In [104]:
# def data_split(dataframe):
#     temp_df = dataframe.copy()
#     X_test = temp_df['2016-07-02':'2017-03-27']
#     temp_df.drop(temp_df['2016-07-02':'2017-03-27'].index, inplace=True)
#     X_train = temp_df
#     return X_train, X_test

In [105]:
# X_train, X_test = data_split(df_shift)

### Inspecting the Training Data:

In [130]:
X_train.head(3)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Ex_Dividend,Split_Ratio,Adj_Open,Adj_High,Adj_Low,...,Low_Long_EMA,Close_Long_EMA,Volume_Long_EMA,Ex_Dividend_Long_EMA,Split_Ratio_Long_EMA,Adj_Open_Long_EMA,Adj_High_Long_EMA,Adj_Low_Long_EMA,Adj_Close_Long_EMA,Adj_Volume_Long_EMA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-10-31,106.959,107.35,105.9,106.98,40654793.0,0.0,1.0,101.174038,101.543891,100.172315,...,98.626302,99.449687,55034280.0,0.002727,1.0,93.946729,94.716652,93.184824,93.963038,55034280.0
2014-11-03,108.01,108.04,107.21,108.0,44639285.0,0.0,1.0,102.168194,102.196572,101.411463,...,98.825923,99.648531,54792540.0,0.002664,1.0,94.137926,94.890604,93.376141,94.153636,54792540.0
2014-11-04,108.22,110.3,108.01,109.4,52282550.0,0.0,1.0,102.366836,104.334338,102.168194,...,99.039506,99.87531,54734170.0,0.002602,1.0,94.329296,95.110226,93.580607,94.370598,54734170.0


### Inspecting the Testing Data:

In [131]:
X_test.head(3)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Ex_Dividend,Split_Ratio,Adj_Open,Adj_High,Adj_Low,...,Low_Long_EMA,Close_Long_EMA,Volume_Long_EMA,Ex_Dividend_Long_EMA,Split_Ratio_Long_EMA,Adj_Open_Long_EMA,Adj_High_Long_EMA,Adj_Low_Long_EMA,Adj_Close_Long_EMA,Adj_Volume_Long_EMA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-07-05,95.49,96.465,95.33,95.89,26026540.0,0.0,1.0,93.321192,94.274047,93.164826,...,98.298623,99.153689,39847470.0,0.006571,1.0,96.547814,97.431902,95.771692,96.604325,39847470.0
2016-07-06,95.39,95.4,94.46,94.99,27705210.0,0.0,1.0,93.223463,93.233236,92.314586,...,98.209353,99.056859,39565090.0,0.006418,1.0,96.470504,97.334258,95.691294,96.51661,39565090.0
2016-07-07,94.6,95.66,94.37,95.53,30949090.0,0.0,1.0,92.451406,93.487331,92.22663,...,98.120066,98.974839,39364720.0,0.006269,1.0,96.377036,97.244795,95.61072,96.443207,39364720.0


-----

# Normalizing the Data with a MinMaxScaler

## Instantiating the Scaler:

In [132]:
scaler = MinMaxScaler(feature_range=(-1, 1))

## Scaling the Training Set:

In [133]:
X_train_sc = scaler.fit_transform(X_train.values)

## Scaling the Testing Set:

In [134]:
X_test_sc = scaler.transform(X_test.values)

## Setting the y Training Set:

In [141]:
X_train.shape

(662, 84)

In [142]:
y_train.shape

(854,)

In [135]:
y_train = df[X_train.index[0]:X_train.index[-1]].Close.values

## Setting the y Testing Set

In [136]:
y_test = df[X_test.index[0]:X_test.index[-1]].Close.values

-----

# Random Forest Regression Model

## Setting up the Random Forest (RF) Regressor:

In [137]:
rf = RandomForestRegressor(n_estimators=100, max_depth=15, 
                           min_samples_leaf=3, bootstrap=False, 
                           n_jobs=2, random_state=42)

In [138]:
df.shape

(939, 12)

In [139]:
X_train.shape

(662, 84)

### Fitting the Scaled Data with the RF Model:

In [140]:
rf.fit(X_train_sc, y_train)

JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/runpy.py in _run_module_as_main(mod_name='ipykernel_launcher', alter_argv=1)
    188         sys.exit(msg)
    189     main_globals = sys.modules["__main__"].__dict__
    190     if alter_argv:
    191         sys.argv[0] = mod_spec.origin
    192     return _run_code(code, main_globals, None,
--> 193                      "__main__", mod_spec)
        mod_spec = ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py')
    194 
    195 def run_module(mod_name, init_globals=None,
    196                run_name=None, alter_sys=False):
    197     """Execute a module's code without importing it

...........................................................................
/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/runpy.py in _run_code(code=<code object <module> at 0x105839c00, file "/Use...3.6/site-packages/ipykernel_launcher.py", line 5>, run_globals={'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': '/Users/adamdelreal/anaconda3/envs/tensorflow/lib...ges/__pycache__/ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': '/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/site-packages/ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from '/Users/adamd.../python3.6/site-packages/ipykernel/kernelapp.py'>, ...}, init_globals=None, mod_name='__main__', mod_spec=ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py'), pkg_name='', script_name=None)
     80                        __cached__ = cached,
     81                        __doc__ = None,
     82                        __loader__ = loader,
     83                        __package__ = pkg_name,
     84                        __spec__ = mod_spec)
---> 85     exec(code, run_globals)
        code = <code object <module> at 0x105839c00, file "/Use...3.6/site-packages/ipykernel_launcher.py", line 5>
        run_globals = {'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': '/Users/adamdelreal/anaconda3/envs/tensorflow/lib...ges/__pycache__/ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': '/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/site-packages/ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from '/Users/adamd.../python3.6/site-packages/ipykernel/kernelapp.py'>, ...}
     86     return run_globals
     87 
     88 def _run_module_code(code, init_globals=None,
     89                     mod_name=None, mod_spec=None,

...........................................................................
/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/site-packages/ipykernel_launcher.py in <module>()
     11     # This is added back by InteractiveShellApp.init_path()
     12     if sys.path[0] == '':
     13         del sys.path[0]
     14 
     15     from ipykernel import kernelapp as app
---> 16     app.launch_new_instance()

...........................................................................
/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/site-packages/traitlets/config/application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    653 
    654         If a global instance already exists, this reinitializes and starts it
    655         """
    656         app = cls.instance(**kwargs)
    657         app.initialize(argv)
--> 658         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    659 
    660 #-----------------------------------------------------------------------------
    661 # utility functions, for convenience
    662 #-----------------------------------------------------------------------------

...........................................................................
/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/site-packages/ipykernel/kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    481         if self.poller is not None:
    482             self.poller.start()
    483         self.kernel.start()
    484         self.io_loop = ioloop.IOLoop.current()
    485         try:
--> 486             self.io_loop.start()
        self.io_loop.start = <bound method BaseAsyncIOLoop.start of <tornado.platform.asyncio.AsyncIOMainLoop object>>
    487         except KeyboardInterrupt:
    488             pass
    489 
    490 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tornado/platform/asyncio.py in start(self=<tornado.platform.asyncio.AsyncIOMainLoop object>)
    122         except (RuntimeError, AssertionError):
    123             old_loop = None
    124         try:
    125             self._setup_logging()
    126             asyncio.set_event_loop(self.asyncio_loop)
--> 127             self.asyncio_loop.run_forever()
        self.asyncio_loop.run_forever = <bound method BaseEventLoop.run_forever of <_Uni...EventLoop running=True closed=False debug=False>>
    128         finally:
    129             asyncio.set_event_loop(old_loop)
    130 
    131     def stop(self):

...........................................................................
/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/asyncio/base_events.py in run_forever(self=<_UnixSelectorEventLoop running=True closed=False debug=False>)
    417             sys.set_asyncgen_hooks(firstiter=self._asyncgen_firstiter_hook,
    418                                    finalizer=self._asyncgen_finalizer_hook)
    419         try:
    420             events._set_running_loop(self)
    421             while True:
--> 422                 self._run_once()
        self._run_once = <bound method BaseEventLoop._run_once of <_UnixS...EventLoop running=True closed=False debug=False>>
    423                 if self._stopping:
    424                     break
    425         finally:
    426             self._stopping = False

...........................................................................
/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/asyncio/base_events.py in _run_once(self=<_UnixSelectorEventLoop running=True closed=False debug=False>)
   1427                         logger.warning('Executing %s took %.3f seconds',
   1428                                        _format_handle(handle), dt)
   1429                 finally:
   1430                     self._current_handle = None
   1431             else:
-> 1432                 handle._run()
        handle._run = <bound method Handle._run of <Handle BaseAsyncIOLoop._handle_events(15, 1)>>
   1433         handle = None  # Needed to break cycles when an exception occurs.
   1434 
   1435     def _set_coroutine_wrapper(self, enabled):
   1436         try:

...........................................................................
/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/asyncio/events.py in _run(self=<Handle BaseAsyncIOLoop._handle_events(15, 1)>)
    140             self._callback = None
    141             self._args = None
    142 
    143     def _run(self):
    144         try:
--> 145             self._callback(*self._args)
        self._callback = <bound method BaseAsyncIOLoop._handle_events of <tornado.platform.asyncio.AsyncIOMainLoop object>>
        self._args = (15, 1)
    146         except Exception as exc:
    147             cb = _format_callback_source(self._callback, self._args)
    148             msg = 'Exception in callback {}'.format(cb)
    149             context = {

...........................................................................
/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tornado/platform/asyncio.py in _handle_events(self=<tornado.platform.asyncio.AsyncIOMainLoop object>, fd=15, events=1)
    112             self.writers.remove(fd)
    113         del self.handlers[fd]
    114 
    115     def _handle_events(self, fd, events):
    116         fileobj, handler_func = self.handlers[fd]
--> 117         handler_func(fileobj, events)
        handler_func = <function wrap.<locals>.null_wrapper>
        fileobj = <zmq.sugar.socket.Socket object>
        events = 1
    118 
    119     def start(self):
    120         try:
    121             old_loop = asyncio.get_event_loop()

...........................................................................
/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tornado/stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 1), **kwargs={})
    271         # Fast path when there are no active contexts.
    272         def null_wrapper(*args, **kwargs):
    273             try:
    274                 current_state = _state.contexts
    275                 _state.contexts = cap_contexts[0]
--> 276                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 1)
        kwargs = {}
    277             finally:
    278                 _state.contexts = current_state
    279         null_wrapper._wrapped = True
    280         return null_wrapper

...........................................................................
/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=1)
    445             return
    446         zmq_events = self.socket.EVENTS
    447         try:
    448             # dispatch events:
    449             if zmq_events & zmq.POLLIN and self.receiving():
--> 450                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    451                 if not self.socket:
    452                     return
    453             if zmq_events & zmq.POLLOUT and self.sending():
    454                 self._handle_send()

...........................................................................
/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    475             else:
    476                 raise
    477         else:
    478             if self._recv_callback:
    479                 callback = self._recv_callback
--> 480                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function wrap.<locals>.null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    481         
    482 
    483     def _handle_send(self):
    484         """Handle a send event."""

...........................................................................
/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function wrap.<locals>.null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    427         close our socket."""
    428         try:
    429             # Use a NullContext to ensure that all StackContexts are run
    430             # inside our blanket exception handler rather than outside.
    431             with stack_context.NullContext():
--> 432                 callback(*args, **kwargs)
        callback = <function wrap.<locals>.null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    433         except:
    434             gen_log.error("Uncaught exception in ZMQStream callback",
    435                           exc_info=True)
    436             # Re-raise the exception so that IOLoop.handle_callback_exception

...........................................................................
/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tornado/stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    271         # Fast path when there are no active contexts.
    272         def null_wrapper(*args, **kwargs):
    273             try:
    274                 current_state = _state.contexts
    275                 _state.contexts = cap_contexts[0]
--> 276                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    277             finally:
    278                 _state.contexts = current_state
    279         null_wrapper._wrapped = True
    280         return null_wrapper

...........................................................................
/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/site-packages/ipykernel/kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    278         if self.control_stream:
    279             self.control_stream.on_recv(self.dispatch_control, copy=False)
    280 
    281         def make_dispatcher(stream):
    282             def dispatcher(msg):
--> 283                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    284             return dispatcher
    285 
    286         for s in self.shell_streams:
    287             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/site-packages/ipykernel/kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {'allow_stdin': True, 'code': 'rf.fit(X_train_sc, y_train)', 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 7, 9, 18, 7, 27, 565165, tzinfo=tzutc()), 'msg_id': 'b9b51b5faa2e4a2e82239f61696f3a36', 'msg_type': 'execute_request', 'session': 'd9c573d2d0594f4d86b299a1aad307eb', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': 'b9b51b5faa2e4a2e82239f61696f3a36', 'msg_type': 'execute_request', 'parent_header': {}})
    228             self.log.warn("Unknown message type: %r", msg_type)
    229         else:
    230             self.log.debug("%s: %s", msg_type, msg)
    231             self.pre_handler_hook()
    232             try:
--> 233                 handler(stream, idents, msg)
        handler = <bound method Kernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = [b'd9c573d2d0594f4d86b299a1aad307eb']
        msg = {'buffers': [], 'content': {'allow_stdin': True, 'code': 'rf.fit(X_train_sc, y_train)', 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 7, 9, 18, 7, 27, 565165, tzinfo=tzutc()), 'msg_id': 'b9b51b5faa2e4a2e82239f61696f3a36', 'msg_type': 'execute_request', 'session': 'd9c573d2d0594f4d86b299a1aad307eb', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': 'b9b51b5faa2e4a2e82239f61696f3a36', 'msg_type': 'execute_request', 'parent_header': {}}
    234             except Exception:
    235                 self.log.error("Exception in message handler:", exc_info=True)
    236             finally:
    237                 self.post_handler_hook()

...........................................................................
/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/site-packages/ipykernel/kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=[b'd9c573d2d0594f4d86b299a1aad307eb'], parent={'buffers': [], 'content': {'allow_stdin': True, 'code': 'rf.fit(X_train_sc, y_train)', 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 7, 9, 18, 7, 27, 565165, tzinfo=tzutc()), 'msg_id': 'b9b51b5faa2e4a2e82239f61696f3a36', 'msg_type': 'execute_request', 'session': 'd9c573d2d0594f4d86b299a1aad307eb', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': 'b9b51b5faa2e4a2e82239f61696f3a36', 'msg_type': 'execute_request', 'parent_header': {}})
    394         if not silent:
    395             self.execution_count += 1
    396             self._publish_execute_input(code, parent, self.execution_count)
    397 
    398         reply_content = self.do_execute(code, silent, store_history,
--> 399                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    400 
    401         # Flush output before sending the reply.
    402         sys.stdout.flush()
    403         sys.stderr.flush()

...........................................................................
/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/site-packages/ipykernel/ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code='rf.fit(X_train_sc, y_train)', silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    203 
    204         self._forward_input(allow_stdin)
    205 
    206         reply_content = {}
    207         try:
--> 208             res = shell.run_cell(code, store_history=store_history, silent=silent)
        res = undefined
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = 'rf.fit(X_train_sc, y_train)'
        store_history = True
        silent = False
    209         finally:
    210             self._restore_input()
    211 
    212         if res.error_before_exec is not None:

...........................................................................
/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/site-packages/ipykernel/zmqshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, *args=('rf.fit(X_train_sc, y_train)',), **kwargs={'silent': False, 'store_history': True})
    532             )
    533         self.payload_manager.write_payload(payload)
    534 
    535     def run_cell(self, *args, **kwargs):
    536         self._last_traceback = None
--> 537         return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        self.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        args = ('rf.fit(X_train_sc, y_train)',)
        kwargs = {'silent': False, 'store_history': True}
    538 
    539     def _showtraceback(self, etype, evalue, stb):
    540         # try to preserve ordering of tracebacks and print statements
    541         sys.stdout.flush()

...........................................................................
/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/site-packages/IPython/core/interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell='rf.fit(X_train_sc, y_train)', store_history=True, silent=False, shell_futures=True)
   2657         -------
   2658         result : :class:`ExecutionResult`
   2659         """
   2660         try:
   2661             result = self._run_cell(
-> 2662                 raw_cell, store_history, silent, shell_futures)
        raw_cell = 'rf.fit(X_train_sc, y_train)'
        store_history = True
        silent = False
        shell_futures = True
   2663         finally:
   2664             self.events.trigger('post_execute')
   2665             if not silent:
   2666                 self.events.trigger('post_run_cell', result)

...........................................................................
/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/site-packages/IPython/core/interactiveshell.py in _run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell='rf.fit(X_train_sc, y_train)', store_history=True, silent=False, shell_futures=True)
   2780                 self.displayhook.exec_result = result
   2781 
   2782                 # Execute the user code
   2783                 interactivity = 'none' if silent else self.ast_node_interactivity
   2784                 has_raised = self.run_ast_nodes(code_ast.body, cell_name,
-> 2785                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler object>
   2786                 
   2787                 self.last_execution_succeeded = not has_raised
   2788                 self.last_execution_result = result
   2789 

...........................................................................
/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/site-packages/IPython/core/interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.Expr object>], cell_name='<ipython-input-140-e3af0ef1b6ae>', interactivity='last', compiler=<IPython.core.compilerop.CachingCompiler object>, result=<ExecutionResult object at 115537ba8, execution_...rue silent=False shell_futures=True> result=None>)
   2904                     return True
   2905 
   2906             for i, node in enumerate(to_run_interactive):
   2907                 mod = ast.Interactive([node])
   2908                 code = compiler(mod, cell_name, "single")
-> 2909                 if self.run_code(code, result):
        self.run_code = <bound method InteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x115500930, file "<ipython-input-140-e3af0ef1b6ae>", line 1>
        result = <ExecutionResult object at 115537ba8, execution_...rue silent=False shell_futures=True> result=None>
   2910                     return True
   2911 
   2912             # Flush softspace
   2913             if softspace(sys.stdout, 0):

...........................................................................
/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/site-packages/IPython/core/interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x115500930, file "<ipython-input-140-e3af0ef1b6ae>", line 1>, result=<ExecutionResult object at 115537ba8, execution_...rue silent=False shell_futures=True> result=None>)
   2958         outflag = True  # happens in more places, so it's easier as default
   2959         try:
   2960             try:
   2961                 self.hooks.pre_run_code_hook()
   2962                 #rprint('Running code', repr(code_obj)) # dbg
-> 2963                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x115500930, file "<ipython-input-140-e3af0ef1b6ae>", line 1>
        self.user_global_ns = {'In': ['', 'def data_split(dataframe):\n    temp_df = datafra...e)\n    train = temp_df\n    return X_train, X_test', 'train, test = data_split(df_shift)', 'import pandas as pd\nimport numpy as np\nimport ma...onfig\', "InlineBackend.figure_format = \'retina\'")', "company_name = 'Apple'", 'def data_reader(company_name):\n    company_name=...index(inplace=True, ascending=True)\n    return df', "df = data_reader('Apple')", 'df.head(3)', 'def shift_dates(df):\n    shifted_df = pd.DataFra....index, columns=df.columns)\n    return shifted_df', 'def lag_and_shift_data(dataframe):\n    temp_df =...emp_df = shift_dates(temp_df)\n\n    return temp_df', 'df_shift = lag_and_shift_data(df)', 'df_shift.head()', 'df_shift.tail(3)', "# df_shift['2014-07-01':]", "df_shift.drop(df_shift[:'2014-07-01'].index, inplace=True)", 'df_shift.head(3)', 'def data_split(dataframe):\n    temp_df = datafra...e)\n    train = temp_df\n    return X_train, X_test', 'train, test = data_split(df_shift)', 'def data_split(dataframe):\n    temp_df = datafra...=True)\n    train = temp_df\n    return train, test', 'train, test = data_split(df_shift)', ...], 'MinMaxScaler': <class 'sklearn.preprocessing.data.MinMaxScaler'>, 'Out': {7:              Open   High    Low  Close     Volum...010  0.373010  0.371246   0.371246   26432000.0  , 11:              Open   High    Low  Close    Volume...263         2.086528e+07  

[5 rows x 84 columns], 12:               Open    High     Low    Close     ...-27         3.379657e+07  

[3 rows x 84 columns], 15:               Open   High    Low  Close      Vol...-07         6.474376e+07  

[3 rows x 84 columns], 20:               Open   High    Low  Close      Vol...-07         6.474376e+07  

[3 rows x 84 columns], 21:              Open    High    Low  Close      Vol...-07         3.936497e+07  

[3 rows x 84 columns], 22:              Open   High    Low  Close     Volum...536  0.393300  0.391536   0.391536   18362400.0  , 27:              Open   High    Low  Close     Volum...010  0.373010  0.371246   0.371246   26432000.0  , 28:              Open   High    Low  Close     Volum...010  0.373010  0.371246   0.371246   26432000.0  , 30:               Open   High    Low   Close      Vo...2014-07-03  22891800.0  
2014-07-07  56468000.0  , ...}, 'PCA': <class 'sklearn.decomposition.pca.PCA'>, 'Pipeline': <class 'sklearn.pipeline.Pipeline'>, 'RandomForestRegressor': <class 'sklearn.ensemble.forest.RandomForestRegressor'>, 'TimeSeriesSplit': <class 'sklearn.model_selection._split.TimeSeriesSplit'>, 'X_test':                Open      High       Low    Close...9         2.823258e+07  

[184 rows x 84 columns], 'X_test_sc': array([[-0.94723899, -0.9558427 , -0.92449836, .... -0.14502245,
        -0.15189614, -0.80361455]]), 'X_train':                Open     High       Low    Close ...2         3.379657e+07  

[662 rows x 84 columns], ...}
        self.user_ns = {'In': ['', 'def data_split(dataframe):\n    temp_df = datafra...e)\n    train = temp_df\n    return X_train, X_test', 'train, test = data_split(df_shift)', 'import pandas as pd\nimport numpy as np\nimport ma...onfig\', "InlineBackend.figure_format = \'retina\'")', "company_name = 'Apple'", 'def data_reader(company_name):\n    company_name=...index(inplace=True, ascending=True)\n    return df', "df = data_reader('Apple')", 'df.head(3)', 'def shift_dates(df):\n    shifted_df = pd.DataFra....index, columns=df.columns)\n    return shifted_df', 'def lag_and_shift_data(dataframe):\n    temp_df =...emp_df = shift_dates(temp_df)\n\n    return temp_df', 'df_shift = lag_and_shift_data(df)', 'df_shift.head()', 'df_shift.tail(3)', "# df_shift['2014-07-01':]", "df_shift.drop(df_shift[:'2014-07-01'].index, inplace=True)", 'df_shift.head(3)', 'def data_split(dataframe):\n    temp_df = datafra...e)\n    train = temp_df\n    return X_train, X_test', 'train, test = data_split(df_shift)', 'def data_split(dataframe):\n    temp_df = datafra...=True)\n    train = temp_df\n    return train, test', 'train, test = data_split(df_shift)', ...], 'MinMaxScaler': <class 'sklearn.preprocessing.data.MinMaxScaler'>, 'Out': {7:              Open   High    Low  Close     Volum...010  0.373010  0.371246   0.371246   26432000.0  , 11:              Open   High    Low  Close    Volume...263         2.086528e+07  

[5 rows x 84 columns], 12:               Open    High     Low    Close     ...-27         3.379657e+07  

[3 rows x 84 columns], 15:               Open   High    Low  Close      Vol...-07         6.474376e+07  

[3 rows x 84 columns], 20:               Open   High    Low  Close      Vol...-07         6.474376e+07  

[3 rows x 84 columns], 21:              Open    High    Low  Close      Vol...-07         3.936497e+07  

[3 rows x 84 columns], 22:              Open   High    Low  Close     Volum...536  0.393300  0.391536   0.391536   18362400.0  , 27:              Open   High    Low  Close     Volum...010  0.373010  0.371246   0.371246   26432000.0  , 28:              Open   High    Low  Close     Volum...010  0.373010  0.371246   0.371246   26432000.0  , 30:               Open   High    Low   Close      Vo...2014-07-03  22891800.0  
2014-07-07  56468000.0  , ...}, 'PCA': <class 'sklearn.decomposition.pca.PCA'>, 'Pipeline': <class 'sklearn.pipeline.Pipeline'>, 'RandomForestRegressor': <class 'sklearn.ensemble.forest.RandomForestRegressor'>, 'TimeSeriesSplit': <class 'sklearn.model_selection._split.TimeSeriesSplit'>, 'X_test':                Open      High       Low    Close...9         2.823258e+07  

[184 rows x 84 columns], 'X_test_sc': array([[-0.94723899, -0.9558427 , -0.92449836, .... -0.14502245,
        -0.15189614, -0.80361455]]), 'X_train':                Open     High       Low    Close ...2         3.379657e+07  

[662 rows x 84 columns], ...}
   2964             finally:
   2965                 # Reset our crash handler in place
   2966                 sys.excepthook = old_excepthook
   2967         except SystemExit as e:

...........................................................................
/Users/adamdelreal/dsi/projects/capstone/stocks/<ipython-input-140-e3af0ef1b6ae> in <module>()
----> 1 rf.fit(X_train_sc, y_train)

...........................................................................
/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/site-packages/sklearn/ensemble/forest.py in fit(self=RandomForestRegressor(bootstrap=False, criterion...se, random_state=42, verbose=0, warm_start=False), X=array([[-0.69083387, -0.71123594, -0.684843  , ....        0.9960293 , -0.4830785 ]], dtype=float32), y=array([[108.    ],
       [109.4   ],
       [10...164.94  ],
       [172.77  ],
       [168.34  ]]), sample_weight=None)
    323             trees = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
    324                              backend="threading")(
    325                 delayed(_parallel_build_trees)(
    326                     t, self, X, y, sample_weight, i, len(trees),
    327                     verbose=self.verbose, class_weight=self.class_weight)
--> 328                 for i, t in enumerate(trees))
        i = 99
    329 
    330             # Collect newly grown trees
    331             self.estimators_.extend(trees)
    332 

...........................................................................
/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=Parallel(n_jobs=2), iterable=<generator object BaseForest.fit.<locals>.<genexpr>>)
    784             if pre_dispatch == "all" or n_jobs == 1:
    785                 # The iterable was consumed all at once by the above for loop.
    786                 # No need to wait for async callbacks to trigger to
    787                 # consumption.
    788                 self._iterating = False
--> 789             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=2)>
    790             # Make sure that we get a last message telling us we are done
    791             elapsed_time = time.time() - self._start_time
    792             self._print('Done %3i out of %3i | elapsed: %s finished',
    793                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
ValueError                                         Mon Jul  9 11:07:27 2018
PID: 2737Python 3.6.5: /Users/adamdelreal/anaconda3/envs/tensorflow/bin/python
...........................................................................
/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        self.items = [(<function _parallel_build_trees>, (DecisionTreeRegressor(criterion='mse', max_depth...=False, random_state=1608637542, splitter='best'), RandomForestRegressor(bootstrap=False, criterion...se, random_state=42, verbose=0, warm_start=False), array([[-0.69083387, -0.71123594, -0.684843  , ....        0.9960293 , -0.4830785 ]], dtype=float32), array([[108.    ],
       [109.4   ],
       [10...164.94  ],
       [172.77  ],
       [168.34  ]]), None, 0, 100), {'class_weight': None, 'verbose': 0})]
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in <listcomp>(.0=<list_iterator object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <function _parallel_build_trees>
        args = (DecisionTreeRegressor(criterion='mse', max_depth...=False, random_state=1608637542, splitter='best'), RandomForestRegressor(bootstrap=False, criterion...se, random_state=42, verbose=0, warm_start=False), array([[-0.69083387, -0.71123594, -0.684843  , ....        0.9960293 , -0.4830785 ]], dtype=float32), array([[108.    ],
       [109.4   ],
       [10...164.94  ],
       [172.77  ],
       [168.34  ]]), None, 0, 100)
        kwargs = {'class_weight': None, 'verbose': 0}
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/site-packages/sklearn/ensemble/forest.py in _parallel_build_trees(tree=DecisionTreeRegressor(criterion='mse', max_depth...=False, random_state=1608637542, splitter='best'), forest=RandomForestRegressor(bootstrap=False, criterion...se, random_state=42, verbose=0, warm_start=False), X=array([[-0.69083387, -0.71123594, -0.684843  , ....        0.9960293 , -0.4830785 ]], dtype=float32), y=array([[108.    ],
       [109.4   ],
       [10...164.94  ],
       [172.77  ],
       [168.34  ]]), sample_weight=None, tree_idx=0, n_trees=100, verbose=0, class_weight=None)
    118         elif class_weight == 'balanced_subsample':
    119             curr_sample_weight *= compute_sample_weight('balanced', y, indices)
    120 
    121         tree.fit(X, y, sample_weight=curr_sample_weight, check_input=False)
    122     else:
--> 123         tree.fit(X, y, sample_weight=sample_weight, check_input=False)
        tree.fit = <bound method DecisionTreeRegressor.fit of Decis...False, random_state=1608637542, splitter='best')>
        X = array([[-0.69083387, -0.71123594, -0.684843  , ....        0.9960293 , -0.4830785 ]], dtype=float32)
        y = array([[108.    ],
       [109.4   ],
       [10...164.94  ],
       [172.77  ],
       [168.34  ]])
        sample_weight = None
    124 
    125     return tree
    126 
    127 

...........................................................................
/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/site-packages/sklearn/tree/tree.py in fit(self=DecisionTreeRegressor(criterion='mse', max_depth...=False, random_state=1608637542, splitter='best'), X=array([[-0.69083387, -0.71123594, -0.684843  , ....        0.9960293 , -0.4830785 ]], dtype=float32), y=array([[108.    ],
       [109.4   ],
       [10...164.94  ],
       [172.77  ],
       [168.34  ]]), sample_weight=None, check_input=False, X_idx_sorted=None)
   1119 
   1120         super(DecisionTreeRegressor, self).fit(
   1121             X, y,
   1122             sample_weight=sample_weight,
   1123             check_input=check_input,
-> 1124             X_idx_sorted=X_idx_sorted)
        X_idx_sorted = None
   1125         return self
   1126 
   1127 
   1128 class ExtraTreeClassifier(DecisionTreeClassifier):

...........................................................................
/Users/adamdelreal/anaconda3/envs/tensorflow/lib/python3.6/site-packages/sklearn/tree/tree.py in fit(self=DecisionTreeRegressor(criterion='mse', max_depth...=False, random_state=1608637542, splitter='best'), X=array([[-0.69083387, -0.71123594, -0.684843  , ....        0.9960293 , -0.4830785 ]], dtype=float32), y=array([[108.    ],
       [109.4   ],
       [10...164.94  ],
       [172.77  ],
       [168.34  ]]), sample_weight=None, check_input=False, X_idx_sorted=None)
    231 
    232         self.max_features_ = max_features
    233 
    234         if len(y) != n_samples:
    235             raise ValueError("Number of labels=%d does not match "
--> 236                              "number of samples=%d" % (len(y), n_samples))
        y = array([[108.    ],
       [109.4   ],
       [10...164.94  ],
       [172.77  ],
       [168.34  ]])
        n_samples = 662
    237         if not 0 <= self.min_weight_fraction_leaf <= 0.5:
    238             raise ValueError("min_weight_fraction_leaf must in [0, 0.5]")
    239         if max_depth <= 0:
    240             raise ValueError("max_depth must be greater than zero. ")

ValueError: Number of labels=854 does not match number of samples=662
___________________________________________________________________________

### Scoring on the Training Data:

In [21]:
rf.score(X_train_sc, y_train)

0.9988511480564479

### Scoring on the Testing Data

In [22]:
rf.score(X_test_sc, y_test)

-3.6526599373079107

### Inspecting the Average Prediction:

In [23]:
y_test.mean()

154.62082337662338

In [24]:
rf.predict(X_test_sc).mean()

182.12251210445518

In [25]:
rf.feature_importances_.mean()

0.011904761904761902

In [26]:
# coef_weights = pd.DataFrame(rf.coef_, index=X_train.columns, columns=['weight'])
# coef_weights.sort_values('weight').tail()

-----

## Decomposing Signal Components with Principal Component Analysis (PCA):

### Instantiating the PCA Decomposition:

In [27]:
pca = PCA(n_components=1, random_state=42)

### Fitting and Transforming the Scaled Training Set with PCA Weights:

In [28]:
X_train_pca = pca.fit_transform(X_train_sc)

### Transforming the Scaled Test Set with PCA Weights:

In [29]:
X_test_pca = pca.transform(X_train_sc)

### Fitting the PCA Weighted Training Data on a Random Forest Regressor Model:

In [30]:
rf.fit(X_train_pca, y_train)

RandomForestRegressor(bootstrap=False, criterion='mse', max_depth=15,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=3, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=2,
           oob_score=False, random_state=42, verbose=0, warm_start=False)

### Scoring the PCA Weighted Training Data on a Random Forest Regressor Model:

In [31]:
rf.score(X_train_pca, y_train)

0.8824991349411133

### Scoring the PCA Weighted Test Data on a Random Forest Regressor Model:

In [33]:
# rf.score(X_test_sc, y_test)

### Inspecting the Average Prediction Value of the PCA Weighted Training Data on a Random Forest Regressor Model:

In [34]:
rf.predict(X_test_pca).mean()

100.05874912845564

-----

# Grid Searching a Random Forest Regression Model:

In [39]:
from sklearn.model_selection import GridSearchCV

### Creating a Pipeline

In [40]:
pipe = Pipeline([
    ('rf', RandomForestRegressor())
])

### Setting up the Parameters:

In [None]:
# Number of trees in random forest

n_estimators = [x for x in range(8, 14, 2)]

# Number of features to consider at every split
max_features = ['auto', 'log2']

# Maximum number of levels in tree
max_depth = [x for x in range(1, 3)]
max_depth.append(None)

# Minimum number of samples required to split a node
min_samples_split = [x for x in range(1, 3)]

# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2]

# Method of selecting samples for training each tree
bootstrap = [True, False]

In [None]:
params = {'rf__n_estimators': n_estimators,
           'rf__max_features': max_features,
           'rf__max_depth': max_depth,
           'rf__min_samples_split': min_samples_split,
           'rf__min_samples_leaf': min_samples_leaf,
           'rf__bootstrap': bootstrap}
print(params)

### Gridsearching the Parameters:

In [None]:
rf_search = GridSearchCV(pipe, params, n_jobs=3)

### Fitting the Scaled Data with the Model:

In [None]:
rf_search.fit(X_train_sc, y_train)

### Scoring the Training Data:

In [None]:
rf_search.score(X_train_sc, y_train)

### Scoring the Test Data

In [None]:
rf_search.score(X_test_sc, y_test)

In [None]:
plt.scatter(y_test, rf_search.predict(X_test_sc))
plt.xlabel('Actual')
plt.ylabel('Predicted')

In [None]:
coef_weights = pd.DataFrame(rf_search.coef_, index=X_train.columns, columns=['weight'])
coef_weights.sort_values('weight').tail()

- `'rf__n_estimators': [40, 55, 70, 85, 100],` => `n_estimators=10, `


- `'rf__max_features': ['auto', 'sqrt'],` => `max_features='auto',`


- `'rf__max_depth': [2, 80, None],` => -`max_depth=None,`


- `'rf__min_samples_split': [2, 5, 10],` => `min_samples_split=2,`


- `'rf__min_samples_leaf': [1, 2, 4],` => `min_samples_leaf=1,`


- `'rf__bootstrap': [True, False]` => `bootstrap=True,`

--------

## GridSearching a Random Forest with PCA Decomposition:

In [None]:
pipe_w = Pipeline([
    ('pca', PCA()),
    ('rf', RandomForestRegressor())
])

### Setting the Params

In [None]:
# Number of trees in random forest
n_estimators_w = [x for x in range(8, 14, 2)]

# Number of features to consider at every split
max_features_w = ['auto', 'log2']

# Maximum number of levels in tree
max_depth_w = [x for x in range(1, 3)]
max_depth_w.append(None)

# Minimum number of samples required to split a node
min_samples_split_w = [x for x in range(1, 5)]

# Minimum number of samples required at each leaf node
min_samples_leaf_w = [x for x in range(1, 3)]

# Method of selecting samples for training each tree
bootstrap_w = [True, False]

# pca_n_components= [x for x in range(2, 24, 4)]

# pca_svd_solver = ['auto', 'full', 'arpack', 'randomized']

### Setting up the Parameters with PCA Weights:

In [None]:
params_w = {'rf__n_estimators': n_estimators_w,
          'rf__max_features': max_features_w,
          'rf__max_depth': max_depth_w,
          'rf__min_samples_split': min_samples_split_w,
          'rf__min_samples_leaf': min_samples_leaf_w,
          'rf__bootstrap': bootstrap_w}
print(params_w)

### Gridsearching the Parameters with PCA:

In [None]:
grid = GridSearchCV(pipe_w, params_w, n_jobs=3)

### Fitting the Scaled Data with a Weighted Model:

In [None]:
grid.fit(X_train_sc, y_train)

### Scoring the Training Data:

In [None]:
grid.score(X_train_sc, y_train)

### Scoring the Test Data:

In [None]:
grid.score(X_test_sc, y_test)

In [None]:
plt.scatter(y_test, grid.predict(X_test_sc))
plt.xlabel('Actual')
plt.ylabel('Predicted')