Check AWS S3 shared data files

In [None]:
! aws s3 ls s3://databeanstalk-community/data/

Load local file to folder on left menu

In [None]:
! ls -ltr

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)


In [None]:
LocalData = pd.read_csv('Starbucks_stock_history.csv')
print(LocalData.head())

 *  Task-1 Dataset Load  
 *  Task-2 Data Visualization Charts  
 *  Task-3 Statistical Analysis and Inference  
 *  Task-4 Linear Regression and Forecasting 


<a id=1><h3 >1️⃣ Dataset Loading<br></h3></a>
<a id=2><h3 >2️⃣ Dataset Visualization Using Simple Plots<br></h3></a>
<a id=3><h3 >3️⃣ Dataset Visualization Using MPLFinance Plots<br></h3></a>
<a id=4><h3 >4️⃣ Stock Analysis of the data<br></h3></a>
<a id=4.1><h4 >4️⃣.1️⃣ MACD Forecasting of data using Pandas TA<br></h4></a>
<a id=5><h3 >5️⃣ Let us do a financial ratios calculation using FINTA library <br></h3></a>
<a id=6><h3 >6️⃣ Let us do a financial ratios calculation<br></h3></a>
<a id=7><h3 >7️⃣ Let us do a bit of forecasting using Facebook Prophet Regression <br></h3></a>
<a id=8><h3 >8️⃣ Let us do a Facebook Prophet library<br></h3></a>


In [None]:
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import matplotlib.dates as mdates
import matplotlib.colors as mcolors
from matplotlib import style
import seaborn as sns
sns.set_style('whitegrid')
plt.style.use("fivethirtyeight")
%matplotlib inline

# For reading stock data from yahoo
from pandas_datareader.data import DataReader

# For time stamps
from datetime import datetime
from math import sqrt

# [1️⃣. Dataset Loading](#1)>

In [None]:
sbux = pd.read_csv('s3://databeanstalk-community/data/Starbucks_stock_history.csv')
print(sbux.head())

# [2️⃣. Dataset Visualization Using Simple Plots](#2)


## Initial simple plots using 
* *Pandas Plotting for opening and closing prices*
* *Plotly OHLC (Open-High-Low-Close) Charts*
* *Statistical Moving Average (SMA) Charts*
* *Exponential Moving Average (EMA) Charts*


#### Simple Pandas plots for the price and volume of Starbucks Stock

In [None]:
#Closing Price PLot
sbux[["Close"]].plot()
#Volume Plot
sbux[["Volume"]].plot()

### CAGR Compound Annual Growth Rate (CAGR)

In [None]:
sbux2 = pd.read_csv('s3://databeanstalk-community/data/Starbucks_stock_history.csv', header=0, 
                  index_col= 0, names=['Date','Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Splits'], parse_dates=True)
# Get the number of days in `sbux`
days = (sbux2.index[-1] - sbux2.index[0]).days

# Calculate the CAGR 
cagr = ((((sbux2['Close'][-1]) / sbux2['Close'][1])) ** (365.0/days)) - 1


# Print CAGR
print("The CAGR (Compound Annual Growth Rate) of Starbucks since IPO is " , round((cagr*100),2), "% per year")

### Moving Windows


In [None]:
# Isolate the adjusted closing prices 
adj_close_px = sbux2['Close']

# Calculate the moving average
moving_avg = adj_close_px.rolling(window=40).mean()

# Inspect the result
moving_avg[-10:]

In [None]:
# Short moving window rolling mean
sbux2['42'] = adj_close_px.rolling(window=40).mean()

# Long moving window rolling mean
sbux2['252'] = adj_close_px.rolling(window=252).mean()

# Plot the adjusted closing price, the short and long windows of rolling means
sbux2[['Close', '42', '252']].plot()

plt.show()

#### OHLC Chart

In [None]:
#OHLC Plot using Plotly
import plotly.graph_objects as go

fig = go.Figure(data=go.Ohlc(x=sbux['Date'],
        open=sbux['Open'],
        high=sbux['High'],
        low=sbux['Low'],
        close=sbux['Close']))
fig.show()

### Volatility Calculation

In [None]:
daily_close_px = sbux2[['Close']]
# Calculate the daily percentage change for `daily_close_px`
daily_pct_change = daily_close_px.pct_change()

# Plot the distributions
daily_pct_change.hist(bins=50, sharex=True, figsize=(12,8))

# Show the resulting plot
plt.show()

In [None]:
# Define the minumum of periods to consider 
min_periods = 75 

# Calculate the volatility
vol = daily_pct_change.rolling(min_periods).std() * np.sqrt(min_periods) 

# Plot the volatility
vol.plot(figsize=(10, 8))

# Show the plot
plt.show()

In [None]:
# Plot a scatter matrix with the `daily_pct_change` data 
pd.plotting.scatter_matrix(daily_pct_change, diagonal='kde', alpha=0.1,figsize=(12,12))

# Show the plot
plt.show()

#### SMA chart plotting for 5, 20, 50, 200, 500 day moving averages

In [None]:
sbux['SMA5'] = sbux.Close.rolling(5).mean()
sbux['SMA20'] = sbux.Close.rolling(20).mean()
sbux['SMA50'] = sbux.Close.rolling(50).mean()
sbux['SMA200'] = sbux.Close.rolling(200).mean()
sbux['SMA500'] = sbux.Close.rolling(500).mean()

fig = go.Figure(data=[go.Ohlc(x=sbux['Date'],
                              open=sbux['Open'],
                              high=sbux['High'],
                              low=sbux['Low'],
                              close=sbux['Close'], name = "OHLC"),
                      go.Scatter(x=sbux.Date, y=sbux.SMA5, line=dict(color='orange', width=1), name="SMA5"),
                      go.Scatter(x=sbux.Date, y=sbux.SMA20, line=dict(color='green', width=1), name="SMA20"),
                      go.Scatter(x=sbux.Date, y=sbux.SMA50, line=dict(color='blue', width=1), name="SMA50"),
                      go.Scatter(x=sbux.Date, y=sbux.SMA200, line=dict(color='violet', width=1), name="SMA200"),
                      go.Scatter(x=sbux.Date, y=sbux.SMA500, line=dict(color='purple', width=1), name="SMA500")])
fig.show()

#### EMA chart plotting for 5, 20, 50, 200, 500 day moving averages

In [None]:

sbux['EMA5'] = sbux.Close.ewm(span=5, adjust=False).mean()
sbux['EMA20'] = sbux.Close.ewm(span=20, adjust=False).mean()
sbux['EMA50'] = sbux.Close.ewm(span=50, adjust=False).mean()
sbux['EMA200'] = sbux.Close.ewm(span=200, adjust=False).mean()
sbux['EMA500'] = sbux.Close.ewm(span=500, adjust=False).mean()

fig = go.Figure(data=[go.Ohlc(x=sbux['Date'],
                              open=sbux['Open'],
                              high=sbux['High'],
                              low=sbux['Low'],
                              close=sbux['Close'], name = "OHLC"),
                      go.Scatter(x=sbux.Date, y=sbux.SMA5, line=dict(color='orange', width=1), name="EMA5"),
                      go.Scatter(x=sbux.Date, y=sbux.SMA20, line=dict(color='green', width=1), name="EMA20"),
                      go.Scatter(x=sbux.Date, y=sbux.SMA50, line=dict(color='blue', width=1), name="EMA50"),
                      go.Scatter(x=sbux.Date, y=sbux.SMA200, line=dict(color='violet', width=1), name="EMA200"),
                      go.Scatter(x=sbux.Date, y=sbux.SMA500, line=dict(color='purple', width=1), name="EMA500")])
fig.show()

#### Reset the index on to the Date column

In [None]:
sbux.set_index('Date')

#### Create a column on daily change percentages for the Stock

In [None]:
sbux['daily_change_pct'] = sbux['Close'].pct_change()*100
sbux['returns'] = sbux['daily_change_pct'] / sbux['Close']  
sbux.head()

#### Create a histogram on the daily changes / percentage

In [None]:
sbux['daily_change_pct'].fillna(0)
sbux['daily_change_pct'].hist(bins = 50, figsize = (10,5)) 
plt.xlabel('Daily Change Percentage')
plt.ylabel('Frequency')
plt.show()
#print the statistics on daily change percentage
sbux.daily_change_pct.describe()

#### Plot 7-day rolling averages for volume of stocks traded

In [None]:
sbux_vol = sbux['Volume'].rolling(7).std()*np.sqrt(7)
sbux_vol.plot(figsize = (15, 7))

#### Define a daily trend function for usage in aggregation

In [None]:
def daily_trend(x):
    if x > -0.5 and x <= 0.5:
        return 'No change'
    elif x > 0.5 and x <= 2:
        return 'Upto 2% Increase'
    elif x > -2 and x <= -0.5:
        return 'Upto 2% Decrease'
    elif x > 2 and x <= 5:
        return '2-5% Increase'
    elif x > -5 and x <= -2:
        return '2-5% Decrease'
    elif x > 5 and x <= 10:
        return '5-10% Increase'
    elif x > -10 and x <= -5:
        return '5-10% Decrease'
    elif x > 10:
        return '>10% Increase'
    elif x <= -10:
        return '>10% Decrease'

#### SMA chart plotting for 5, 20, 50, 200, 500 day moving averages

In [None]:
sbux['Trend']= np.zeros(sbux['daily_change_pct'].count()+1)
sbux['Trend']= sbux['daily_change_pct'].apply(lambda x:daily_trend(x))
sbux['Trend'].replace('None','No change')
sbux.head()

#### A Pie & Bar Chart of Daily Percentage change categories

In [None]:
sbux_pie_data = sbux.groupby('Trend')
#pie_label = sbux_pie_data['Trend'].unique()
plt.pie(sbux_pie_data['Trend'].count(), #labels = pie_label, 
        autopct = '%1.1f%%', radius = 2 )
plt.show()
ax=sbux_pie_data['Trend'].count().sort_values(ascending=False).plot.bar(rot=90)
plt.show()

# [3️⃣. Dataset Visualization Using MPLFinance Plots](#3)


#### Another Candle Stick Plot with Moving Averages and show of non trading days

In [None]:
sbux.head()

# [4️⃣. Stock Data Analysis using standard techniques](#4)


###### Following is a reuse from the kernel https://www.kaggle.com/datapple/eda-tesla-stock-price-financial-analysis and modified for the analysis

#### Identify the days where the SMA20 is greater than SMA50 and tag the dates.These are target investment dates

In [None]:
sbux['Shares'] = [1 if sbux.loc[ei, 'SMA20']>sbux.loc[ei, 'SMA50'] else 0 for ei in sbux.index]

### Tag the dates as profit when the closing price is greater than the preceding date 

#### Plot the dates of wealth generation over the years

In [None]:
sbux['Close1'] = sbux['Close'].shift(-1)
sbux['Profit'] = [sbux.loc[ei, 'Close1'] - sbux.loc[ei, 'Close'] if sbux.loc[ei, 'Shares']==1 else 0 for ei in sbux.index]
sbux['Profit'].plot()
plt.axhline(y=0, color='red')

#### Calculate the cumulated wealth on the stock

In [None]:
sbux['wealth'] = sbux['Profit'].cumsum()
sbux.tail()

In [None]:
sbux['wealth'].plot()
plt.title('Total money made by Starbucks Stock (number of times ): {}'.format(round((sbux.loc[sbux.index[-2], 'wealth']),1)))

#### Calculate the probability of better investment

In [None]:
sbux['LogReturn'] = np.log(sbux['Close']).shift(-1) - np.log(sbux['Close'])
print(sbux['LogReturn'])

#### Build a probability distribution function plot using scipy stats package

#### Plot a histogram on logarithm of returns

In [None]:
from scipy.stats import norm
mu = sbux['LogReturn'].mean()
sigma = sbux['LogReturn'].std(ddof=1)

density = pd.DataFrame()
density['x'] = np.arange(sbux['LogReturn'].min()-0.01, sbux['LogReturn'].max()+0.01, 0.001)
density['pdf'] = norm.pdf(density['x'], mu, sigma)

sbux['LogReturn'].hist(bins=50, figsize=(15, 8))
plt.plot(density['x'], density['pdf'], color='red')
plt.show()

#### Check for the probability od drops and jumps for a range

In [None]:
mu220 = 365*mu
sigma220 = (365**0.5) * sigma
for i in range(-5, 6, 1):
    drop = norm.cdf((-i)*0.05, mu220, sigma220)
    jump = norm.cdf(i*0.05, mu220, sigma220)
    print('The probability of %d percent drop is = %f & The probability of %d percent jump is = %f' % (i*5, round(drop,2), -i*5, round(jump,2)))

#### What is the Value At Risk for the Stock?

In [None]:
VaR = norm.ppf(0.05, mu, sigma)
print('Single day value at risk is: ', VaR)

#### What is the Mu and Sigma for the stock at 5 and 95 percentile?

In [None]:
# Quantile 
# 5% quantile
print('5% quantile ', norm.ppf(0.05, mu, sigma))
# 95% quantile
print('95% quantile ', norm.ppf(0.95, mu, sigma))

#### What is the confidence level at various levels?

In [None]:
sbux['logReturn'] = np.log(sbux['Close'].shift(-1)) - np.log(sbux['Close'])
sample_size = sbux['logReturn'].shape[0]
sample_mean = sbux['logReturn'].mean()
sample_std = sbux['logReturn'].std(ddof=1) / sample_size**0.5
# left and right quantile
for i in range(1,10):
    z_left = norm.ppf(0.05*i)
    z_right = norm.ppf((1-0.05*i))
    interval_left = sample_mean+z_left*sample_std
    interval_right = sample_mean+z_right*sample_std
    print('%d percent confidence interval is ' % (100-i*2*5), ( round(interval_left,6), round(interval_right,6)))

# [4️⃣.1️⃣ MACD Forecasting of data using Pandas TA](#4.1)

### Let us do a bit of MACD based Trading indicators


##### Inspiration from Alpharithms -  https://www.alpharithms.com/calculate-macd-python-272222/

#### Created MACD Values

In [None]:

# Get the 26-day EMA of the closing price
k = sbux['Close'].ewm(span=12, adjust=False, min_periods=12).mean()
# Get the 12-day EMA of the closing price
d = sbux['Close'].ewm(span=26, adjust=False, min_periods=26).mean()
# Subtract the 26-day EMA from the 12-Day EMA to get the MACD
macd = k - d
# Get the 9-Day EMA of the MACD for the Trigger line
macd_s = macd.ewm(span=9, adjust=False, min_periods=9).mean()
# Calculate the difference between the MACD - Trigger for the Convergence/Divergence value
macd_h = macd - macd_s
# Add all of our new values for the MACD to the dataframe
sbux['macd'] = sbux.index.map(macd)
sbux['macd_h'] = sbux.index.map(macd_h)
sbux['macd_s'] = sbux.index.map(macd_s)
# View our data
pd.set_option("display.max_columns", None)
print(sbux)

#### Calculate MACD through Pandas Technical Analysis Library

In [None]:
try: 
    import pandas_ta as ta
except:
    !pip install pandas_ta
    import pandas_ta as ta



In [None]:

# Calculate MACD values using the pandas_ta library
sbux2.ta.macd(close='close', fast=12, slow=26, signal=9, append=True)
# View result
pd.set_option("display.max_columns", None)  # show all columns
print(sbux2)

### Let us do some plotting

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# calculate MACD values
sbux2.ta.macd(close='close', fast=12, slow=26, append=True)
# Force lowercase (optional)
sbux2.columns = [x.lower() for x in sbux2.columns]
# Construct a 2 x 1 Plotly figure
fig = make_subplots(rows=2, cols=1)
# price Line
fig.append_trace(
    go.Scatter(
        x=sbux2.index,
        y=sbux2['open'],
        line=dict(color='lawngreen', width=1),
        name='open',
        # showlegend=False,
        legendgroup='1',
    ), row=1, col=1
)
# Candlestick chart for pricing
fig.append_trace(
    go.Candlestick(
        x=sbux2.index,
        open=sbux2['open'],
        high=sbux2['high'],
        low=sbux2['low'],
        close=sbux2['close'],
        increasing_line_color='lawngreen',
        decreasing_line_color='black',
        showlegend=False
    ), row=1, col=1
)
# Fast Signal (%k)
fig.append_trace(
    go.Scatter(
        x=sbux2.index,
        y=sbux2['macd_12_26_9'],
        line=dict(color='lawngreen', width=2),
        name='macd',
        # showlegend=False,
        legendgroup='2',
    ), row=2, col=1
)
# Slow signal (%d)
fig.append_trace(
    go.Scatter(
        x=sbux2.index,
        y=sbux2['macds_12_26_9'],
        line=dict(color='mediumblue', width=2),
        # showlegend=False,
        legendgroup='2',
        name='signal'
    ), row=2, col=1
)
# Colorize the histogram values
colors = np.where(sbux2['macdh_12_26_9'] < 0, '#000', 'lawngreen')
# Plot the histogram
fig.append_trace(
    go.Bar(
        x=sbux2.index,
        y=sbux2['macdh_12_26_9'],
        name='histogram',
        marker_color=colors,
    ), row=2, col=1
)
# Make it pretty
layout = go.Layout(
    plot_bgcolor='linen',
    # Font Families
    font_family='Monospace',
    font_color='mediumblue',
    font_size=20,
    xaxis=dict(
        rangeslider=dict(
            visible=False
        )
    )
)
# Update options and show plot
fig.update_layout(layout)
fig.update_layout(height=800,width=1200,dragmode='lasso')
fig.show()

# [5️⃣ Let us do a financial ratios calculation using FINTA library](#5)

* Simple Moving Average 'SMA'
* Simple Moving Median 'SMM'
* Smoothed Simple Moving Average 'SSMA'
* Exponential Moving Average 'EMA'
* Double Exponential Moving Average 'DEMA'
* Triple Exponential Moving Average 'TEMA'
* Triangular Moving Average 'TRIMA'
* Triple Exponential Moving Average Oscillator 'TRIX'
* Volume Adjusted Moving Average 'VAMA'
* Kaufman Efficiency Indicator 'ER'
* Kaufman's Adaptive Moving Average 'KAMA'
* Zero Lag Exponential Moving Average 'ZLEMA'
* Weighted Moving Average 'WMA'
* Hull Moving Average 'HMA'
* Elastic Volume Moving Average 'EVWMA'
* Volume Weighted Average Price 'VWAP'
* Smoothed Moving Average 'SMMA'
* Fractal Adaptive Moving Average 'FRAMA'
* Moving Average Convergence Divergence 'MACD'
* Percentage Price Oscillator 'PPO'
* Volume-Weighted MACD 'VW_MACD'
* Elastic-Volume weighted MACD 'EV_MACD'
* Market Momentum 'MOM'
* Rate-of-Change 'ROC'
* Relative Strenght Index 'RSI'
* Inverse Fisher Transform RSI 'IFT_RSI'
* True Range 'TR'
* Average True Range 'ATR'
* Stop-and-Reverse 'SAR'
* Bollinger Bands 'BBANDS'
* Bollinger Bands Width 'BBWIDTH'
* Momentum Breakout Bands 'MOBO'
* Percent B 'PERCENT_B'
* Keltner Channels 'KC'
* Donchian Channel 'DO'
* Directional Movement Indicator 'DMI'
* Average Directional Index 'ADX'
* Pivot Points 'PIVOT'
* Fibonacci Pivot Points 'PIVOT_FIB'
* Stochastic Oscillator %K 'STOCH'
* Stochastic oscillator %D 'STOCHD'
* Stochastic RSI 'STOCHRSI'
* Williams %R 'WILLIAMS'
* Ultimate Oscillator 'UO'
* Awesome Oscillator 'AO'
* Mass Index 'MI'
* Vortex Indicator 'VORTEX'
* Know Sure Thing 'KST'
* True Strength Index 'TSI'
* Typical Price 'TP'
* Accumulation-Distribution Line 'ADL'
* Chaikin Oscillator 'CHAIKIN'
* Money Flow Index 'MFI'
* On Balance Volume 'OBV'
* Weighter OBV 'WOBV'
* Volume Zone Oscillator 'VZO'
* Price Zone Oscillator 'PZO'
* Elder's Force Index 'EFI'
* Cummulative Force Index 'CFI'
* Bull power and Bear Power 'EBBP'
* Ease of Movement 'EMV'
* Commodity Channel Index 'CCI'
* Coppock Curve 'COPP'
* Buy and Sell Pressure 'BASP'
* Normalized BASP 'BASPN'
* Chande Momentum Oscillator 'CMO'
* Chandelier Exit 'CHANDELIER'
* Qstick 'QSTICK'
* Twiggs Money Index 'TMF'
* Wave Trend Oscillator 'WTO'
* Fisher Transform 'FISH'
* Ichimoku Cloud 'ICHIMOKU'
* Adaptive Price Zone 'APZ'
* Squeeze Momentum Indicator 'SQZMI'
* Volume Price Trend 'VPT'
* Finite Volume Element 'FVE'
* Volume Flow Indicator 'VFI'
* Moving Standard deviation 'MSD'
* Schaff Trend Cycle 'STC'


In [None]:
try:
    from finta import TA
    from backtesting import Backtest, Strategy
    from backtesting.lib import crossover
except:
    !pip install finta backtesting
    from finta import TA
    from backtesting import Backtest, Strategy
    from backtesting.lib import crossover


In [None]:
fin_sbux = pd.read_csv('s3://databeanstalk-community/data/Starbucks_stock_history.csv', index_col="Date", parse_dates=True)
print(fin_sbux.head())
ohlc=fin_sbux
print(TA.SMA(ohlc, 42))
#will return Pandas Series object with "Awesome oscillator" values
TA.AO(ohlc)
#expects ["volume"] column as input
print(TA.OBV(ohlc))
#will return Series with Bollinger Bands columns [BB_UPPER, BB_LOWER]
print(TA.BBANDS(ohlc))
#will return Series with calculated BBANDS values but will use KAMA instead of MA for calculation, other types of Moving Averages are allowed as well.
print(TA.BBANDS(ohlc, MA=TA.KAMA(ohlc, 20)))

## Use backtesting library to do a simple data backtesting

In [None]:
# Defining DEMA cross strategy
class DemaCross(Strategy):

    def init(self):

        self.ma1 = self.I(TA.DEMA, ohlc, 10)
        self.ma2 = self.I(TA.DEMA, ohlc, 20)

    def next(self):
        if crossover(self.ma1, self.ma2):
            self.buy()
        elif crossover(self.ma2, self.ma1):
            self.sell()

In [None]:
bt = Backtest(ohlc, DemaCross,
              cash=10000, commission=0.025)

### Let us check the Backtesting summary

In [None]:
bt.run()

## It is very evidend that albeit some volatility, it is a pretty solid long term stock with outstanding run over last 5 years.

> 

# [6️⃣ Let us do a financial ratios calculation](#6)


### Let us try Sharpe Ratio

In [None]:
def sharpe_ratio(return_series, N, rf):
    mean = return_series.mean() * N -rf
    sigma = return_series.std() * np.sqrt(N)
    return mean / sigma

N = 255 #255 trading days in a year
rf =0.018 #1.8% risk free rate
sbux.columns
temp_df=sbux[['Close']]
sharpes = temp_df.apply(sharpe_ratio, args=(N,rf,))#,axis=0)
print("The Sharpe Ratio for the stock i =" , round(sharpes[0],2))

In [None]:
def calculate_sortino(series,N, rf):
    expected_return = np.mean(series)
    below_avg = [i for i in series if i < expected_return]
    risk_free = rf
    sortino = (expected_return - risk_free) / np.std(below_avg)
    return round(sortino, 2)

sortinos = temp_df.apply(calculate_sortino, args=(N,rf,))
print("The Sortino Ratio for the stock is =" , round(sortinos[0],2))

In [None]:
def max_drawdown(return_series):
    comp_ret = (return_series+1).cumprod()
    peak = comp_ret.expanding(min_periods=1).max()
    dd = (comp_ret/peak)-1
    return dd.min()


max_drawdowns = temp_df.apply(max_drawdown,axis=0)
print("The max_drawdowns Ratio for the stock is =" , round(max_drawdowns[0],2))

## Try Bollinger Bands Price

In [None]:
def bollinger_bands_price(price):
    up, mid, low = BBANDS(close, timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)
    bollinger_bands_price = (price['Close'] - low) / (up - low)
    return bollinger_bands_price

# [7️⃣ Let us do a bit of forecasting using Regression ](#7)

 Some inspiration from the following post as well
*  [MLQ.AI](https://www.mlq.ai/price-prediction-with-linear-regression/)


In [None]:
# Histogram of the daily price change percent of 'Close' price
sbux['Close'].pct_change().plot.hist(bins=50)
plt.title('Daily Price: 1-Day Percent Change')
plt.show() 

In [None]:
# Create 5-day % changes of Last for the current day, and 5 days in the future
sbux['5d_future_close'] = sbux['Close'].shift(-5)
sbux['5d_close_future_pct'] = sbux['5d_future_close'].pct_change(5)
sbux['5d_close_pct'] = sbux['Close'].pct_change(5)

In [None]:
# Calculate the correlation matrix between the 5d close pecentage changes (current and future)
corr = sbux[['5d_close_pct', '5d_close_future_pct']].corr()

In [None]:
# Scatter the current 5-day percent change vs the future 5-day percent change
plt.scatter(sbux['5d_close_pct'], sbux['5d_close_future_pct'])
plt.title('Current vs. Future 5-Day % Change')
plt.show()

In [None]:
# a list of the feature names for later
feature_names = ['5d_close_pct']  

In [None]:
# Drop all na values
sbux = sbux.dropna()

# Create features and targets
# use feature_names for features; 5d_close_future_pct for targets
features = sbux[feature_names]
targets = sbux['5d_close_future_pct']

# Create DataFrame from target column and feature columns
feat_targ_df = sbux[['5d_close_future_pct'] + feature_names]

# Calculate correlation matrix
corr = feat_targ_df.corr()
print(corr)

In [None]:
import seaborn as sns
# Plot heatmap of correlation matrix
sns.heatmap(corr, annot=True)
plt.yticks(rotation=0); plt.xticks(rotation=90) # fix ticklabel directions
plt.show() # show the plot

In [None]:
import statsmodels.api as sm

# Add a constant to the features
linear_features = sm.add_constant(features)

# Create a size for the training set that is 80% of the total number of rows
#.shape gives us the number of rows in our data, and convert to an int
train_size = int(0.8 * features.shape[0])

# split features and targets using python indexing
train_features = linear_features[:train_size]
train_targets = targets[:train_size]
test_features = linear_features[train_size:]
test_targets = targets[train_size:]
print(linear_features.shape, train_features.shape, test_features.shape)

In [None]:
# Create the linear model and complete the least squares fit
model = sm.OLS(train_targets, train_features)
results = model.fit()  # fit the model
print(results.summary())

# examine pvalues
# Features with p <= 0.05 are typically considered significantly different from 0
print(results.pvalues)

# Make predictions from our model for train and test sets
train_predictions = results.predict(train_features)
test_predictions = results.predict(test_features)

# [8️⃣ Let us do a Forecasting using FB Prophet ](#8)


# Use of Facebook Prophet for Stock Forecasting
## Starbucks Stock forecast 
* This kernel uses Facebook Prophet (which uses SKLEARN) and Neural Prophet (which uses PyTorch) libaries to predict stocks. This kernel splits the NASDAQ stocks into ten folds and creates 10 output files each for the stocks in a PDF Format

###  Yahoo! DataReader Download
<div class="alert alert-block alert-info">
<b>Step-1:</b> Download data using Yahoo! Finance Data Reader</div>

###  Facebook Prophet Models
<div class="alert alert-block alert-info">
<b>Step-2:</b> Build Facebook Prophet Input dataset</div>
<div class="alert alert-block alert-info">
<b>Step-3:</b> Run Prophet Model</div>
<div class="alert alert-block alert-info">
<b>Step-4:</b> Generate Model Visualization </div>
<div class="alert alert-block alert-info">
<b>Step-5:</b> Generate the Image Files </div>
<div class="alert alert-block alert-info">
<b>Step-6:</b> Consolidate Image Files for Facebook Prophet Model </div>

###  Neural Prophet Models
<div class="alert alert-block alert-warning">
<b>Step-7-11:</b> Repeat Steps 2-6 for Neural Prophet Model </div>



In [None]:
def plot_close_val(data_frame, column, stock):
    plt.figure(figsize=(16,6))
    plt.title(column + ' Price History for ' + stock )
    plt.plot(data_frame[column])
    plt.xlabel('Date', fontsize=18)
    plt.ylabel(column + ' Price USD ($) for ' + stock, fontsize=18)
    plt.show()

In [None]:
def fb_prophet_plot(df, param, stock):
    # Check the dataset
    #df.head()
    # Create Dataset based on the FB Prophet standard ds/y
    df2 = pd.DataFrame([], columns = ['ds', 'y'])
    df2.columns=['ds','y']
    df2['ds']=pd.to_datetime(df.index,format='%Y-%m')
    #Assign the Parameter
    df2['y']=df[param].values
    #check the new DS
    #df2.head()
    
    #create a new Prophet Model
    model=Prophet(interval_width=0.95,yearly_seasonality=True)
    
    #Fit the Model
    model.fit(df2)
    #Predict the future
    future=model.make_future_dataframe(periods=50, freq='MS')
    forecast=model.predict(future)
    #Save the predicted model
    preds= forecast['yhat'][:-50]
    #plot the graph
    title = 'FB Prophet predictions for ==> ' + stock
    plt.figure(figsize=(16,8))
    plt.plot(df2.ds,preds,color='#555555',label=' Predictions')
    plt.plot(df2.ds,df2.y,color='#1155FF',label='Actual')
    plt.title(title)
    plt.legend()
    plt.show()
    figure=model.plot(forecast,xlabel='Date',ylabel='Price')
    figure.set_size_inches(16,8)
    plt.title(title)
    model.plot_components(forecast)
    plt.show()

In [None]:
plot_close_val(sbux, 'Close', 'sbux') # Plot the Closing Price