# Apple, Inc. (AAPL) - Linear Regression Models:


### Overview: 
- 1) Importing the data.
- 2) Splitting the data into a Training and Testing Set
- 3) Normalizing the data.
- 4) Modeling.


## Importing Libraries:

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns

from datetime import datetime

from keras.models import Sequential
from keras.layers import LSTM, Dense, Activation, Input, Dropout
from keras.activations import relu, softmax

from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import BaggingRegressor, GradientBoostingRegressor, AdaBoostRegressor

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

Using TensorFlow backend.


-----

## Company Name

In [2]:
company_name = 'Apple'

# Importing the Data

## Importing the Whole Data Frame:

In [19]:
def data_reader(company_name):
    """ Returns the clean Data corresponding to the company; the data is imported from a CSV. 
    Additionally, the Date Column is converted to date time and placed as the index. """
    company_name=company_name
    df = pd.read_csv(f'data/{company_name}_Clean.csv')
    df['Date'] = pd.to_datetime(df.Date)
    df.set_index('Date', inplace=True)
    df.sort_index(inplace=True, ascending=True)
    return df

In [20]:
df = data_reader(company_name)

In [21]:
df.head(3)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Ex_Dividend,Split_Ratio,Adj_Open,Adj_High,Adj_Low,Adj_Close,Adj_Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1980-12-12,28.75,28.87,28.75,28.75,2093900.0,0.0,1.0,0.422706,0.42447,0.422706,0.422706,117258400.0
1980-12-15,27.38,27.38,27.25,27.25,785200.0,0.0,1.0,0.402563,0.402563,0.400652,0.400652,43971200.0
1980-12-16,25.37,25.37,25.25,25.25,472000.0,0.0,1.0,0.37301,0.37301,0.371246,0.371246,26432000.0


In [6]:
# X_train, X_test = df_shift[:'2016-12-30'], df_shift['2017-01-03':]

## Importing the Train and Test set:

In [17]:
def import_split_data(company_name):
    """ 
    Returns the Train and Test Set from a CSV. 
    Additionally, the Date Column is converted to date time and placed as the index. 
    """
    company_name=company_name
    
    X_train = pd.read_csv(f'data/modeling_data/{company_name}_X_Train.csv')    
    X_train['Date'] = pd.to_datetime(X_train.Date)
    X_train.set_index('Date', inplace=True)
    X_train.sort_index(inplace=True, ascending=True)
    
    X_test = pd.read_csv(f'data/modeling_data/{company_name}_X_Test.csv')
    X_test['Date'] = pd.to_datetime(X_test.Date)
    X_test.set_index('Date', inplace=True)
    X_test.sort_index(inplace=True, ascending=True)
    
    return X_train, X_test

In [18]:
X_train, X_test = import_split_data(company_name)

### Inspecting the Training Data Set:

In [10]:
X_train.head(2)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Ex_Dividend,Split_Ratio,Adj_Open,Adj_High,Adj_Low,...,Low_Long_EMA,Close_Long_EMA,Volume_Long_EMA,Ex_Dividend_Long_EMA,Split_Ratio_Long_EMA,Adj_Open_Long_EMA,Adj_High_Long_EMA,Adj_Low_Long_EMA,Adj_Close_Long_EMA,Adj_Volume_Long_EMA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1981-04-15,27.88,28.0,27.88,27.88,29700.0,0.0,1.0,0.409914,0.411679,0.409914,...,27.100545,27.100545,395955.964908,0.0,1.0,0.399428,0.40078,0.398454,0.398454,22173530.0
1981-04-16,26.63,26.63,26.5,26.5,152000.0,0.0,1.0,0.391536,0.391536,0.389625,...,27.086579,27.086579,390282.570375,0.0,1.0,0.399245,0.400565,0.398249,0.398249,21855820.0


### Inspecting the Test Data Set:

In [14]:
X_test.head(2)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Ex_Dividend,Split_Ratio,Adj_Open,Adj_High,Adj_Low,...,Low_Long_EMA,Close_Long_EMA,Volume_Long_EMA,Ex_Dividend_Long_EMA,Split_Ratio_Long_EMA,Adj_Open_Long_EMA,Adj_High_Long_EMA,Adj_Low_Long_EMA,Adj_Close_Long_EMA,Adj_Volume_Long_EMA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-01-03,116.65,117.2,115.43,115.82,30586265.0,0.0,1.0,115.209202,115.752409,114.004271,...,110.936682,111.809549,32622640.0,0.006808,1.0,109.969134,110.881942,109.289159,110.149132,32622660.0
2017-01-04,115.8,116.33,114.76,116.15,28781865.0,0.0,1.0,114.369701,114.893155,113.342546,...,111.025596,111.91049,32533320.0,0.00665,1.0,110.071473,110.975226,109.383424,110.255324,32533340.0


-----

# Normalizing the Data with MinMaxScaler

## Instantiating the Scaler:

In [15]:
def mm_scaler(X_train, X_test, dataframe):
    """
    Returns scaled X_train & X_test data sets using SK-Learn's MinMaxScaler.
    Returns a holdout for each train and test data set using the dataframe. 
    
    Parameter
    -------------------------------------------------------------------------------------------------------------
    X_train : var
        Passes a variable assigned to the X_train data set.
    
    X_test : str
        Passes a variable assigned to the X_test data set.
    
    dataframe : var
        Passes a variable assigned to a pandas data frame.
    """
    temp_df = dataframe.copy()
    
    scaler = MinMaxScaler(feature_range=(-1, 1))
    X_train_sc = scaler.fit_transform(X_train.values)
    X_test_sc = scaler.transform(X_test.values)
    y_train = temp_df[X_train.index[0]:X_train.index[-1]].Close.values
    y_test = temp_df[X_test.index[0]:X_test.index[-1]].Close.values
    return X_train_sc, X_test_sc, y_train, y_test

In [16]:
X_train_sc, X_test_sc, y_train, y_test = mm_scaler(X_train, X_test, df)

----

# Modeling

------

# Preparing the Data for an LSTM Model

## Turning the Scaled Train Dataset into a 3-Dimensional Shape:

In [None]:
train_X = np.reshape(X_train_sc, (X_train_sc.shape[0], X_train_sc.shape[1], 1))

### Analyzing the Shape:

In [None]:
print(train_X.shape)

In [None]:
X_test.shape[1]

## Turning the Scaled Test Dataset into a 3-Dimensional Shape:

In [None]:
test_X = np.reshape(X_test_sc, (X_test_sc.shape[0], X_test_sc.shape[1], 1))

### Analyzing the Shape:

In [None]:
print(test_X.shape)

# Time Series Split

`TimeSeriesSplit(n_splits=3, max_train_size=None)`

A Time Series cross-validator providing both a train and test index to split time series data observed at fixed time intervals. During each split, the test indices must be higher (in time) than before; therefore, random shuffling is inappropriate.

A variation of K-Fold; in the $Kth$ split, the model returns the first $K$ folds as train set and the $(k+1)th$ fold as test set. However, unlike the standard cross-validation methods, successive training sets are supersets of those that come before them.

## Scaling the Data

### Fitting and Transforming the Training Set:

In [None]:
# X_train_scaled = scaler.fit_transform(X.values)

### Checking the Shape:

In [None]:
# X_train_scaled.shape

In [None]:
# X_train_scaled

### Transforming the Prediction Label: 

In [None]:
# X_test_transformed = scaler.transform(test.values)

### Checking the Shape:

In [None]:
# X_test_transformed.shape

In [None]:
# X_test_transformed

## Splitting the Data using TimeSeriesSplit:

In [None]:
tss = TimeSeriesSplit()  

In [None]:
for train_index, test_index in tss.split(X_train_sc):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X_train_sc[train_index], X_train_sc[test_index]
    y_train, y_test = X_train_sc[train_index], X_train_sc[test_index]

### Inspecting the Shape:

In [None]:
print('Shapes: ', '\n'
    'X_train: ', X_train.shape, '\n'
     'X_test: ', X_test.shape, '\n'
      '\n'
     'y_train: ',y_train.shape, '\n'
      'y_test: ', y_test.shape)

------

## Preparing the Data for an LSTM Model

### Turning the Scaled Train Dataset into a 3-Dimensional Shape:

In [None]:
train_X = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1)) 
## Or should the X_train be replaced with X_train_scaled
train_X.shape

In [None]:
test_X = np.reshape(X_test, (X_test.shape[0],  X_test.shape[1], 1))
test_X.shape

In [None]:
# train_y = np.reshape(y_train, (y_train.shape[0], 1, y_train.shape[1])) 
# ## Or should the X_train be replaced with X_train_scaled
# train_y.shape

In [None]:
# test_y = np.reshape(y_test, (y_test.shape[0], 1, y_test.shape[1]))
# test_y.shape

In [None]:
print(train_X.shape,
y_train.shape,
test_X.shape,
y_test.shape)

## LSTM Model

In [None]:
np.random.seed(42)

model = Sequential()
model.add(LSTM(input_shape = (84, 1), output_dim = 1, return_sequences = True))
model.add(Dropout(0.5))

model.add(LSTM(84))
model.add(Dropout(0.5))
model.add(Dense(10))

model.add(Activation("relu"))
model.compile(loss="mse", optimizer="adam")
model.summary()

In [None]:
# for i in range(84):
#     model.fit(train_X, y_train,  
#           epochs=1, verbose=1, shuffle=False)
#     model.reset_states()