###### Package Imports

In [3]:
#  NumPy and Pandas imports
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

#  Reading time series
from pandas_datareader import data

#  Technical Analysis
import pandas_ta as ta

#  Time stamps
import datetime as datetime

#  Visualization (sns is a visualization library based on matplotlib)
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('darkgrid')
%matplotlib inline


In [None]:
#  Start and End Date
start = datetime.date(2017, 4, 10)
end = datetime.date(2019, 4, 10)

#  Assign a database with historical stock quotes from Yahoo! Finance to CSCO
#  Cisco Systems, Inc.
CSCO = data.get_data_yahoo('CSCO', start, end)

In [None]:
CSCO.tail(1)

In [None]:
CSCO_processed = CSCO.ewm(alpha = .130).mean()
# CSCO_processed['Date'] = CSCO_processed.index
# CSCO_processed.set_index('Date')
CSCO_processed.tail(1)

# Feature Extraction
---------
Possibly use pyti, finta?

### Momentum Indicators
Momentum is the measurement of the speed or velocity of price changes. It measures the rate of the rise
or fall in stock's price or volume. It is the rate of change on price or volume movements for
a particular asset. 

Relative Strength Index
Stochastic Oscillator
Williams %R
Moving Average Convergence Divergence
Price Rate of Change
On Balance Volume

## Prediction Indicator (to be class label attribute)

In [None]:
CSCO_Features = CSCO_processed.copy()

In [None]:
#  1 is True (return positive), 0 is False (return negative)
CSCO_Features['Return'] = CSCO_Features['Close'].pct_change(1).shift(-1)
CSCO_Features['Target_Return'] = np.where(CSCO_Features['Return'] > 0, 1, 0)

In [None]:
CSCO_Features.drop(labels=['High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close', 'Return'], 
                   axis=1, inplace=True)

## Relative Strength Index (RSI)
Type of momentum indicator

The RSI is a calculated ratio of the recent upward price movements to the absolute price movement.
The RSI ranges from 0 to 100. It's interpreted as an overbought indicator 
when the value is over 70, and an oversold indicator when the value is below 30.

This will give our categorical attribute three values?  

RSI(Series, Period)
The period is the number of observations the indicator will go back to. The default
that most traders use is 14



In [None]:
CSCO_Features['RSI'] = CSCO_processed.ta.rsi(close='Close') 
CSCO_Features['RSI'].tail()


In [None]:
sns.set(rc={"lines.linewidth": 0.9})
figsize_2 = (16, 8)
figure_2, ax_2 = plt.subplots(figsize=figsize_2)
palette_2 = sns.color_palette('Blues', n_colors=1, desat=.9)
ax_2.set_title('Relative Strength Index')

RSI_line = sns.lineplot(x=CSCO_Features.index, y=CSCO_Features['RSI'], legend='full', 
                        palette=['b'],
                        lw=3,
                        label='RSI')
RSI_line.hlines(70, start, end, linestyles='-', colors='r', label='overbought')
RSI_line.hlines(30, start, end, linestyles='-', colors='g', label='oversold')
RSI_line.hlines(30, start, end, linestyles='-', colors='b', linewidth=0)
RSI_line.fill_between(CSCO_Features.index, CSCO_Features['RSI'], 70, where=CSCO_Features['RSI']>70,
                      interpolate=True,
                      alpha=0.6,
                      facecolor='r',
                      label='overbought')
RSI_line.fill_between(CSCO_Features.index, CSCO_Features['RSI']<70, CSCO_Features['RSI']>30,
                      alpha=.6,
                      facecolor='b',
                      linewidth=0,
                      label='unidentified')
RSI_line.fill_between(CSCO_Features.index, CSCO_Features['RSI'], 30, where=CSCO_Features['RSI']<30,
                      interpolate=True,
                      alpha=0.6,
                      facecolor='g',
                      linewidth=0,
                      label='oversold')


In [None]:
# CSCO_classes = CSCO_processed.copy()
# CSCO_classes.drop(labels=['High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close'], axis=1, inplace=True)
# random=2


Turn RSI into categorical data

In [None]:
# def rsi_categorical(x):
#     if 0 < x <= 30:
#         return 'oversold'
#     elif 30 < x < 70:
#         return 'unknown'
#     return 'overbought'

In [None]:
# CSCO_classes['RSI'] = CSCO_classes['RSI'].apply(rsi_categorical)

In [None]:
# CSCO_classes['RSI'].value_counts()

## Williams Percentage Range (Williams %R)
**overbought:** a security that analyst or traders believe is trading above intrinsic value
**oversold:** condition where an asset has traded lower in price and has the potential for
a price bounce. 

Overbought means the price is near the highs of its recent range, oversold means the price is 
at the lower end of its recent range. 

A type of momentum indicator which measures overbought and oversold levels. It moves
between 0 and -100. A reading above -20 is overbought. A reading below -80 is oversold.

Compares a stock's closing price to the high-low range over a specified period, usually 14 days
 
       Highest - Close
    = ------------------ * -100
      Highest - Lowest

Highest is the highest price in look back period, 
Close is the most recent closing price, 
Lowest is the lowest price in the look back period


In [None]:
CSCO_Features['W %R'] = CSCO_processed.ta.willr(close='Close', high='High', low='Low')
CSCO_Features['W %R'].tail()

In [None]:
sns.set(rc={"lines.linewidth": 0.9})
figsize_3 = (16, 8)
figure_3, ax_3 = plt.subplots(figsize=figsize_3)
palette_3 = sns.color_palette('Blues', n_colors=1, desat=.9)
ax_3.set_title('Relative Strength Index')

WPR_line = sns.lineplot(x=CSCO_Features.index, y=CSCO_Features['W %R'], legend='full', 
                        palette=['b'],
                        lw=3,
                        label='W %R')
WPR_line.hlines(-20, start, end, linestyles='-', colors='r', label='overbought')
WPR_line.hlines(-80, start, end, linestyles='-', colors='g', label='oversold')


In [None]:
# CSCO_classes['W %R'] = CSCO_Features['W %R'].copy()

Turn Williams %R into categorical data

In [None]:
# def wpr_categorical(x):
#     if -20 <= x < 0:
#         return 'overbought'
#     elif -80 < x < -20:
#         return 'unknown'
#     return 'oversold'

In [None]:
# CSCO_classes['W %R'] = CSCO_classes['W %R'].apply(wpr_categorical)

In [None]:
# CSCO_classes['W %R'].value_counts()

## Stochastic Oscillator (%K and %D)
The term stochastic refers to the point of a current price in relation to its price
range over a period of time.

The stochastic oscillator is a momentum indicator that uses support and resistance levels.
Very similar to the Williams %R

A support level is a level where the price finds support as it falls. The price
is more likely to go up from this level than continue down.

A resistance level is the opposite of a support level. The price finds resistance as it rises. 
The price is more likely to drop from this level than continue rising. 

        Close - Lowest
        ---------------- * 100
        Highest - Lowest

Highest is the highest value over a range, Lowest is the lowest value over a range.

A rule in finance is that momentum changes before price changes. This category will 
determine if momentum has changed before price has changed. 

When the %K is lower than %D and %K is above 80, overbought
When the %K is above %D and %K is under 20, oversold

In [None]:
CSCO_Features[['STOCHF_14', 'STOCHF_3', 'STOCH_5', 'STOCH_3']] = \
    CSCO_processed.ta.stoch(close='Close', high='High', low='Low')

In [None]:
CSCO_Features.rename(columns={'STOCHF_14': '%K', 
                              'STOCHF_3': '%D', 
                              'STOCH_5': 'Slow %K',
                              'STOCH_3': 'Slow %D'}, inplace=True)

In [None]:
CSCO_Features.loc[:, ['%K', '%D', 'Slow %K', 'Slow %D']].tail()

In [None]:
CSCO_Features['%K'].plot(legend=True, 
                                 figsize=(15, 7),
                                 title='%K and %D',
                                 label='%K')
CSCO_Features['%D'].plot(legend=True,
                                label='%D')

## Moving Average Convergence Divergence (MACD)
MACD identifies strength, direction, momentum, and duration of a security's overall trend.
The MACD indicator (oscillator) is three time series created from the closing price.
These are the proper MACD series, the moving average of the MACD series, and the difference
between the MACD series and MA(MACD). 

The MACD time series is: (12-Day Estimated Moving Average - 26-Day Estimated Moving Average) <br>
The Signal Line time series is: (9-Day Estimated Moving Average of MACD) <br>
The MACD Histogram time series is: (MACD time series - Signal line time series) <br> 

A fast moving average responds more quickly than a slow moving average to a recent change in stock's
price. The MACD compares moving averages of different periods to indicate changes of a stock's trend

In [None]:
CSCO_Features[['MACD_12_26_9', 'MACDH_12_26_9', 'MACDS_12_26_9']] = CSCO_processed.ta.macd(close='Close')

In [None]:
CSCO_Features.rename(columns={'MACD_12_26_9': 'MACD', 
                              'MACDH_12_26_9': 'MACD Difference', 
                              'MACDS_12_26_9': 'MACD Signal'}, inplace=True)

In [None]:
CSCO_Features.loc[:, ['MACD', 'MACD Difference', 'MACD Signal']].tail()

In [None]:
figsize_4 = (17, 6)
figure_4, ax_4 = plt.subplots(figsize=figsize_4)
CSCO_Features['MACD'].plot(legend=True, 
                                 title='Moving Average Convergence Divergence',
                                 label='MACD')
CSCO_Features['MACD Signal'].plot(legend=True,
                                label='MACD Signal')
CSCO_Features['MACD Difference'].plot(legend=True,
                                      label='MACD Difference',
                                      linestyle='-').hlines(0, start, end, linewidth=1)

## Price Rate of Change (PROC)
Calculates the percent change in price between periods. When prices are rising PROC levels remain
above the zero line, when they are falling PROC levels are below the zero line.

        Close - Close n days ago
        ------------------------ 
            Close n days ago


In [None]:
CSCO_Features['PROC 7-Day'] = CSCO_processed.ta.roc(close='Close', length=7)
CSCO_Features['PROC 14-Day'] = CSCO_processed.ta.roc(close='Close', length=14)

In [None]:
CSCO_Features.loc[:, ['PROC 1-Day', 'PROC 7-Day', 'PROC 14-Day']].tail()

In [None]:
figure_5, ax_5 = plt.subplots(nrows=3, ncols=1, figsize=(16, 14))
#  Plot the rolling variance for original closing prices
CSCO_Features['PROC 1-Day'].plot(ax=ax_5[0],
                                 legend=True,
                                 title='Price Rate of Change Over Different Time Periods',
                                 color='blue')
#  Plot the rolling variance for log transformed closing prices
CSCO_Features['PROC 7-Day'].plot(ax=ax_5[1],
                                 legend=True,
                                 color='purple')
CSCO_Features['PROC 14-Day'].plot(ax=ax_5[2],
                                 legend=True,
                                 color='red')

## On Balance Volume (OBV)
On Balance Volume measures buying and selling pressure. When the volume on up days outpaces the volume 
on down days, the OBV rises. When down days outpace volume on up days, OBV falls. 


In [None]:
CSCO_Features['OBV'] = CSCO_processed.ta.roc(close='Close', volume='Volume')

In [None]:
CSCO_Features['OBV'].tail()

In [None]:
sns.set(rc={"lines.linewidth": 0.9})
figsize_6 = (16, 8)
figure_6, ax_6 = plt.subplots(figsize=figsize_6)
palette_6 = sns.color_palette('Blues', n_colors=1, desat=.9)
ax_6.set_title('On Balance Volume')

OBV_line = sns.lineplot(x=CSCO_Features.index, y=CSCO_Features['OBV'], legend='full', 
                        palette=['b'],
                        lw=2,
                        label='OBV')

In [None]:
#  All features
CSCO_Features.tail(1)

In [None]:
#  Check Something
CSCO_Features.loc[:, ['Target_Return', 'PROC 1-Day']].head(20)

In [None]:
CSCO_Features.columns