###### Package Imports

In [3]:
#  NumPy and Pandas imports
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

#  Reading time series
from pandas_datareader import data

#  Time stamps
import datetime as datetime

#  statistics package
from scipy import stats

#  Visualization (sns is a visualization library based on matplotlib)
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('darkgrid')
%matplotlib inline

###### Extract Data

In [4]:
#  Stock tickers to retrieve historical index data
ticker_index_data = ['AMD', 'CSCO', 'INTC', 'MU', 'ORCL', 'QCOM']

#  Start and End retrieval dates
start = datetime.date(2017, 10, 10)
end = datetime.date(2019, 4, 10)

#  Closing price for each tech stock
stock_closing_prices = data.DataReader(ticker_index_data, 'yahoo', start, end)['Close']


# Part 2: Exploring the Data
------


## Daily Return
------
To quantify the daily return, the **percentage change** is calculated as the percentage between the 
difference of the current and prior element divided by the prior element. 
Using this function on the 'Close' attribute gives the daily
return for the technology stock

In [6]:
#  The pandas DataFrame .pct_change() method returns the percentage change 
tech_daily_return = stock_closing_prices.pct_change()
 

### Describe Daily Return
* The sample statistics show: 
    
    * The average daily return over the observation period. 
             
             - All stocks averaged a positive daily return
    * The lowest daily return change (biggest price drop) and the maximum
      return (biggest price increase) for each stock. 
            
    * The volatility of each stock is described with standard deviation. 
            
            - AMD had daily returns with the most dispersion from its mean 
    
    * The descriptive statistics show CSCO, MU, and QCOM have more positive returns above the mean than below the mean
            
            -The mean must be skewed by negative returns.   

In [8]:
tech_daily_return.describe()


Symbols,AMD,CSCO,INTC,MU,ORCL,QCOM
count,377.0,377.0,377.0,377.0,377.0,377.0
mean,0.002654,0.001453,0.001102,0.000476,0.000395,0.000272
std,0.039583,0.015026,0.019838,0.03025,0.014349,0.018691
min,-0.154454,-0.052529,-0.08589,-0.098708,-0.094321,-0.081633
25%,-0.016918,-0.005587,-0.008835,-0.016053,-0.005285,-0.008312
50%,0.002024,0.001663,0.000755,0.000892,0.001622,0.000609
75%,0.02267,0.008961,0.011899,0.018915,0.007929,0.009129
max,0.199481,0.055042,0.105519,0.113706,0.044507,0.127097


### Plot Daily Return

In [None]:
#  The Univariate Daily Return For Each Stock
tech_daily_return.plot(legend=True, title='Daily Percentage Change', subplots=True, figsize=(18,10))


### Test Correlation between Daily Percentage Change of each stock
##### Correlation function for Pairplots

In [None]:
def corrfunc(x, y, **kws):
    r, _= stats.pearsonr(x, y)
    ax = plt.gca()
    ax.annotate("r = {:.4f}".format(r),

                xy=(.1, .9), xycoords=ax.transAxes)

##### Pair plot 
The diagonal shows the distribution of a single variable, the scatter plots
on the upper and lower triangles show the relationship between two variables. 

Linear regression models are fitted to the scatter plots.

kernel density estimates are fitted to the univariate plots 


In [None]:
change_pairplot = sns.PairGrid(tech_daily_return.dropna())
change_pairplot.map_upper(sns.regplot, fit_reg=True, line_kws= {'color' : 'red'})
change_pairplot.map_lower(sns.kdeplot, cmap="Blues_d")
change_pairplot.map_diag(sns.distplot)
change_pairplot.map_upper(corrfunc)


##### Correlation Matrix of Daily Percentage Change

In [None]:
# Correlation Matrix of Daily Returns
daily_returns_corr = tech_daily_return.corr()
daily_returns_corr.style.background_gradient(cmap='coolwarm').set_precision(3)

##### Cluster Map of Daily Percentage Change  

In [None]:
#  Each row is normalized for it's daily return with a z-score
#  The values are measured by correlation
#  Dendrogram displays the results of the cluster analysis
graph_returns = sns.clustermap(tech_daily_return.dropna(), metric="correlation", method='single', 
                                z_score=0, row_cluster=False,
                                cmap='coolwarm',
                                yticklabels=False, figsize=(8,14))

##### Cluster Map of Monthly Percentage Change
Convert Data into months

In [None]:
tech_month_change = tech_daily_return
#tech_month_change.set_index('Date', inplace=True)
tech_month_change.index = pd.to_datetime(tech_month_change.index)
tech_month_change = tech_month_change.resample('M').last()

Display cluster map 

In [None]:
#  Each row is normalized for it's daily return with a z-score
#  The values are measured by correlation
#  Dendrogram displays the results of the cluster analysis
graph_returns_month = sns.clustermap(tech_month_change.dropna(), metric="correlation", method='single', 
                                z_score=0, col_cluster=True,
                                cmap='coolwarm',
                               figsize=(8,14))

## Daily Cumulative Return
-------

In [None]:
tech_cum_daily_return = (1 + tech_daily_return).cumprod()

##### Total Cumulative Stock Return

In [None]:
tech_cum_daily_return.tail(1) 

### Summary of Daily Cumulative Returns

In [None]:
tech_cum_daily_return.describe()

### Visualizing Daily Cumulative Return
The Univariate Daily Cumulative Return

In [None]:
tech_cum_daily_return.plot(legend=True, title='Daily Cumulative Return', figsize=(10,12))


### Test Correlation between the Daily Cumulative Return of each stock

##### Pair plot 
The diagonal shows the distribution of a single variable, the scatter plots
on the upper and lower triangles show the relationship between two variables. 

Linear regression models are fitted to the scatter plots.

kernel density estimates are fitted to the univariate plots 


In [None]:
cum_returns_pairplot = sns.PairGrid(tech_cum_daily_return.dropna())
cum_returns_pairplot.map_upper(sns.regplot, fit_reg=True, line_kws= {'color' : 'red'})
cum_returns_pairplot.map_lower(sns.kdeplot, cmap="Blues_d")
cum_returns_pairplot.map_diag(sns.distplot)
cum_returns_pairplot.map_upper(corrfunc)


##### Correlation Matrix of Cumulative Daily Returns

In [None]:
# Correlation Matrix of Daily Returns
cumu_returns_corr = tech_cum_daily_return.dropna().corr()
cumu_returns_corr.style.background_gradient(cmap='coolwarm').set_precision(3)

##### Cluster Map of Cumulative Daily Returns  

In [None]:
#  Each row is normalized for it's daily return with a z-score
#  The values are measured by correlation
#  Dendrogram displays the results of the cluster analysis
graph_cum_returns = sns.clustermap(tech_cum_daily_return.dropna(), metric="correlation", method='single', 
                                z_score=0, row_cluster=False,
                                cmap='coolwarm',
                                yticklabels=False, figsize=(8,14))

## Quantify Risk of Stock's Daily Return
-------
Quantify risk by comparing expected return with the standard deviation of returns

In [None]:
tech_daily_return.dropna()
quantify_risk = pd.DataFrame()
quantify_risk['Expected Return'] = tech_daily_return.mean()
quantify_risk['Risk'] = tech_daily_return.std()
quantify_risk

In [None]:
sns.set_style("darkgrid")
sns.scatterplot(x='Expected Return', y='Risk', data=quantify_risk, 
                hue=ticker_index_data,
                legend='full',
                size=ticker_index_data,
                sizes=(70,70))

## Test Correlation Between Trading Volume (log scaled) of each stock

In [None]:
#  Get the log scaled trading volume data
log_volume = data.DataReader(ticker_index_data, 'yahoo', start, end)['Volume']
log_volume = np.log(log_volume)

##### Pairplot

In [None]:
trade_volume_pairplot = sns.PairGrid(log_volume.dropna())
trade_volume_pairplot.map_upper(sns.regplot, fit_reg=True, line_kws= {'color' : 'red'})
trade_volume_pairplot.map_lower(sns.kdeplot, cmap="Blues_d")
trade_volume_pairplot.map_diag(sns.distplot)
trade_volume_pairplot.map_upper(corrfunc)

##### Correlation Matrix of Trading Volume

In [None]:
#  Trading Volume Correlation matrix
trading_volume_corr = log_volume.corr()
trading_volume_corr.style.background_gradient(cmap='coolwarm').set_precision(3)

###### Cluster Map of Trading Volume 

In [None]:
#  Each row is normalized for it's daily return with a z-score
#  The values are measured by correlation
#  Dendrogram displays the results of the cluster analysis
graph_volume = sns.clustermap(log_volume.dropna(), metric="correlation", method='single', 
                       z_score=0, row_cluster=False, 
                        cmap='coolwarm',
                        yticklabels=False, figsize=(8,14))
