# Data Project - Stock Market Analysis 

THe objective of this data project is to perform share market data analysis to find answers for below questions using Chubb and its competitor's data for 2018 

We'll be answering the following questions along the way:

    1.) What was the change in price of the stock over time?
    2.) What was the daily return of the stock on average?
    3.) What was the moving average of the various stocks?
    4.) What was the correlation between different stocks' closing prices?
    4.) What was the correlation between different stocks' daily returns?
    5.) How much value do we put at risk by investing in a particular stock?
    6.) How can we attempt to predict future stock behavior?

In [None]:
#Importing necessary libraries
import pandas as pd
from pandas import Series,DataFrame
import numpy as np

# For Visualization
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
%matplotlib inline

# For reading stock data from yahoo
from pandas_datareader import DataReader

# For time stamps
from datetime import datetime


Using Yahoo and pandas to grab some data for some insurance stocks.

In [None]:
# The insurance stocks we'll use for this analysis
insur_list = ['CB','TRV','ALL','PGR']

# Set up End and Start times for data grab
start = datetime(2018,1,1)
end = datetime(2018,12,31)


#For loop for grabing yahoo finance data and setting as a dataframe

for stock in insur_list:   
    # Set DataFrame as the Stock Ticker
    globals()[stock] = DataReader(stock,'yahoo',start,end)

In [None]:
# Summary Stats for Chubb
CB.describe()

In [None]:
# General Info about the columns
CB.info()

plotting out the volume and closing price of the stocks


In [None]:
# Let's see a historical view of the closing price
CB['Adj Close'].plot(legend=True,figsize=(10,4))

In [None]:
# Plotting the total volume of stock being traded each day over the past year
CB['Volume'].plot(legend=True,figsize=(10,4))

Calculating moving average 

In [None]:
# Using pandas's built-in rolling mean calculator to calculate moving average 

ma_day = [10,20,50]

for ma in ma_day:
    column_name = "MA for %s days" %(str(ma))
    CB[column_name]=  CB['Adj Close'].rolling(ma).mean()

In [None]:
CB[['Adj Close','MA for 10 days','MA for 20 days','MA for 50 days']].plot(subplots=False,figsize=(10,4))

Analyzing the Daily returns of the Stock

In [None]:
# We'll use pct_change to find the percent change for each day
CB['Daily Return'] = CB['Adj Close'].pct_change()
# Then we'll plot the daily return percentage
CB['Daily Return'].plot(figsize=(12,4),legend=True,linestyle='--',marker='o')

In [None]:
# Using Seaborn
sns.distplot((CB['Daily Return'].dropna()),bins=100,color='purple')


In [None]:
#Verifying the above chart with pandas built-in hitogram chart
CB['Daily Return'].hist()

Analyzing the retuns of all top insurers stock 

In [None]:
# Grab all the closing prices for the insur stock list into one DataFrame
closing_df = DataReader(['CB','TRV','ALL','PGR'],'yahoo',start,end)['Adj Close']

In [None]:
closing_df.head()

In [None]:
#Claculating the percentage change of all the stock prices
insur_rets = closing_df.pct_change()

In [None]:
# Comparing Chubb to itself should show a perfectly linear relationship
sns.jointplot('CB','CB',(insur_rets),kind='scatter',color='seagreen')

Checking if the stocks are correleated with other insure stock

In [None]:
# Chubb & All state
sns.jointplot('CB','ALL',insur_rets,kind='scatter')

In [None]:
sns.pairplot(insur_rets.dropna())

In [None]:
# Set up our figure by naming it returns_fig, call PairPLot on the DataFrame
returns_fig = sns.PairGrid(insur_rets.dropna())

# Using map_upper we can specify what the upper triangle will look like.
returns_fig.map_upper(plt.scatter,color='purple')

# We can also define the lower triangle in the figure, inclufing the plot type (kde) or the color map (BluePurple)
returns_fig.map_lower(sns.kdeplot,cmap='cool_d')

# Finally we'll define the diagonal as a series of histogram plots of the daily return
returns_fig.map_diag(plt.hist,bins=30)

Analyzing the correlation of closing prices of top competitors 

In [None]:
# Set up our figure by naming it returns_fig, call PairPLot on the DataFrame
returns_fig = sns.PairGrid(closing_df)

# Using map_upper we can specify what the upper triangle will look like.
returns_fig.map_upper(plt.scatter,color='purple')

# We can also define the lower triangle in the figure, inclufing the plot type (kde) or the color map (BluePurple)
returns_fig.map_lower(sns.kdeplot,cmap='cool_d')

# Finally we'll define the diagonal as a series of histogram plots of the closing price
returns_fig.map_diag(plt.hist,bins=30)

Finding the correlation value between stocks

In [None]:
def halfHeatMap(df, mirror):

    # Create Correlation df
    corr = df.corr()
    # Plot figsize
    fig, ax = plt.subplots(figsize=(10, 10))
    # Generate Color Map
    colormap = sns.diverging_palette(220, 10, as_cmap=True)

    if mirror == True:
        #Generate Heat Map, allow annotations and place floats in map
        sns.heatmap(corr, cmap=colormap, annot=True, fmt=".2f")
        #Apply xticks
        plt.xticks(range(len(corr.columns)), corr.columns);
        #Apply yticks
        plt.yticks(range(len(corr.columns)), corr.columns)
        #show plot

    else:
        # Drop self-correlations
        dropSelf = np.zeros_like(corr)
        dropSelf[np.triu_indices_from(dropSelf)] = True# Generate Color Map
        colormap = sns.diverging_palette(220, 10, as_cmap=True)
        # Generate Heat Map, allow annotations and place floats in map
        sns.heatmap(corr, cmap=colormap, annot=True, fmt=".2f", mask=dropSelf)
        # Apply xticks
        plt.xticks(range(len(corr.columns)), corr.columns);
        # Apply yticks
        plt.yticks(range(len(corr.columns)), corr.columns)
        #show plot
        plt.show()

        plt.show()
    
halfHeatMap(insur_rets.dropna(), False)

<b> Risk Analysis </b>

Comparing expected return with std of daily return to quantify the risk of stock


In [None]:
rets = insur_rets.dropna()

#size of the plotted return
area = np.pi*30

plt.scatter(rets.mean(), rets.std(),alpha = 0.5,s =area)

#x and y limits
plt.ylim([0.008,0.020])
plt.xlim([-0.002,0.002])

#axis titles
plt.xlabel('Expected returns')
plt.ylabel('Risk')

for label, x, y in zip(rets.columns, rets.mean(), rets.std()):
    plt.annotate(
        label, 
        xy = (x, y), xytext = (40, 20),
        textcoords = 'offset points', ha = 'right', va = 'bottom',
        arrowprops = dict(arrowstyle = '->', connectionstyle="arc3,rad=-0.3", color='b'))

<b>Calculating Value at Risk Parameter</b>

In [None]:
sns.distplot((CB['Daily Return'].dropna()),bins=100,color='purple')

In [None]:
# The 0.05 empirical quantile of daily returns
rets['CB'].quantile(0.05)

The 0.05 empirical quantile of daily returns is at -0.019. That means that with 95% confidence, our worst daily loss for CB will not exceed 1.9%. If we have a 1 million dollar investment, our one-day 5% VaR is 0.019 * 1,000,000 = $19,000.

<b> Calculating Value at Risk using the Monte Carlo method </b>

Using the Monte Carlo to run many trials with random market conditions, then we'll calculate portfolio losses for each trial. After this, we'll use the aggregation of all these simulations to establish how risky the stock is.

In [None]:
# Set up time horizon for market simulation
days = 365

#delta
dt = 1/days

# mean
mu = rets.mean()['CB']

# std dev
sigma = rets.std()['CB']

In [None]:
''' Creating a function that takes in the starting price and number of days, 
and uses the sigma and mu we already calculated from out daily returns
'''
def stock_monte_carlo(start_price,days,mu,sigma):

    # Define a price array
    price = np.zeros(days)
    price[0] = start_price
    # Schok and Drift
    shock = np.zeros(days)
    drift = np.zeros(days)
    
    # Run price array for number of days
    for x in range(1,days):
        
        # Calculate Schock
        shock[x] = np.random.normal(loc=mu * dt, scale=sigma * np.sqrt(dt))
        # Calculate Drift
        drift[x] = mu * dt
        # Calculate Price
        price[x] = price[x-1] + (price[x-1] * (drift[x] + shock[x]))
        
    return price

In [None]:
CB.head()


In [None]:
#Adjusted closing price of 02-01-2018
start_price = 139.20

#100 simulations
for run in range(100):
    plt.plot(stock_monte_carlo(start_price,days,mu,sigma))
plt.xlabel("Days")
plt.ylabel("Price")  
plt.title('Monte Carlo Analysis for Chubb')

In [None]:
#running the simulation for larger number to get the closing points on all simulation
#
runs = 10000

simulations = np.zeros(runs)

np.set_printoptions(threshold=5)

for run in range(runs):    
    simulations[run] = stock_monte_carlo(start_price,days,mu,sigma)[days-1];

Plotting an histogram with simulation results as 1% emprical quantile

In [None]:
q = np.percentile(simulations, 1)
plt.hist(simulations,bins=200)
plt.figtext(0.6, 0.8, s="Start price: $%.2f" %start_price)
plt.figtext(0.6, 0.7, "Mean final price: $%.2f" % simulations.mean())
plt.figtext(0.6, 0.6, "VaR(0.99): $%.2f" % (start_price - q,))
plt.figtext(0.15, 0.6, "q(0.99): $%.2f" % q)
plt.axvline(x=q, linewidth=4, color='r')
plt.title(u"Final price distribution for Chubb Stock after %s days" % days, weight='bold');


The Estimated Value at Risk for Chubb at 1% emprical quantile is looks to be USD 3.91 which is basically 
means for every stock that is being purchased, $3.91 is at risk 99% of the time.  this is found using Monte Carlo Simulation.