# Setting up the notebook

In [1]:
# necessary imports for the workbook
import pandas as pd
import pandas.io.data
import numpy as np
import datetime
import matplotlib.pyplot as plt

# Set some pandas options
pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', 6)
pd.set_option('display.max_rows', 10) 
pd.set_option('display.width', 78) 
pd.set_option('precision', 4)

# do all our graphics inline
%matplotlib inline

The pandas.io.data module is moved to a separate package (pandas-datareader) and will be removed from pandas in a future version.
After installing the pandas-datareader package (https://github.com/pydata/pandas-datareader), you can change the import ``from pandas.io import data, wb`` to ``from pandas_datareader import data, wb``.


# Obtaining and organizing stock data from Yahoo!

In [2]:
# read data from Yahoo! Finance for a specific 
# stock specified by ticker and between the start and end dates
def getStockData(ticker, start, end):
    # read the data
    data = pd.io.data.DataReader(ticker, "yahoo", start, end)
    # rename this column
    data.rename(columns={'Adj Close': 'AdjClose'}, inplace=True)
    # insert in the ticker as a column
    data.insert(0, "Ticker", ticker)
    return data

In [3]:
# request the three years of data for MSFT
start = datetime.datetime(2012, 1, 1)
end = datetime.datetime(2014, 12, 31)
getStockData("MSFT", start, end).head()

           Ticker   Open   High    ...     Close    Volume  AdjClose
Date                               ...                              
2012-01-03   MSFT  26.55  26.96    ...     26.77  64731500   23.3043
2012-01-04   MSFT  26.82  27.47    ...     27.40  80516100   23.8528
2012-01-05   MSFT  27.38  27.73    ...     27.68  56081400   24.0965
2012-01-06   MSFT  27.53  28.19    ...     28.11  99455500   24.4708
2012-01-09   MSFT  28.05  28.10    ...     27.74  59706800   24.1487

[5 rows x 7 columns]

In [4]:
# gets data for multiple stocks
# tickers: a list of stock symbols to fetch
# start and end are the start end end dates
def getDataForMultipleStocks(tickers, start, end):
    # we return a dictionary
    stocks = dict()
    # loop through all the tickers
    for ticker in tickers:
        # get the data for the specific ticker
        s = getStockData(ticker, start, end)
        # add it to the dictionary
        stocks[ticker] = s
    # return the dictionary
    return stocks

In [5]:
# get the data for all the stocks that we want
raw = getDataForMultipleStocks(
    ["MSFT", "AAPL", "GE", "IBM", "AA", "DAL", 
     "UAL", "PEP", "KO"],
    start, end)

OSError: after 3 tries, Yahoo! did not return a 200 for url 'http://ichart.finance.yahoo.com/table.csv?s=AA&a=0&b=1&c=2012&d=11&e=31&f=2014&g=d&ignore=.csv'

In [None]:
# take a peek at the data for MSFT
raw['MSFT'][:5]

In [None]:
# given the dictionary of data frames,
# pivots a given column into values with column
# names being the stock symbols
def pivotTickersToColumns(raw, column):
    items = []
    # loop through all dictionary keys
    for key in raw:
        # get the data for the key
        data = raw[key]
        # extract just the column specified
        subset = data[["Ticker", column]]
        # add to items
        items.append(subset)
    
    # concatenate all the items
    combined = pd.concat(items)
    # reset the index
    ri = combined.reset_index()
    # return the pivot
    return ri.pivot("Date", "Ticker", column)

In [None]:
# do the pivot
close_px = pivotTickersToColumns(raw, "AdjClose")
# peek at the result
close_px[:5]

## Plotting time-series prices

In [None]:
# plot the closing prices of AAPL
close_px['AAPL'].plot();

In [None]:
# plot the closing prices of MSFT
close_px['MSFT'].plot();

In [None]:
# plot MSFT vs AAPL on the same chart
close_px[['MSFT', 'AAPL']].plot();

## Plotting volume series data

In [None]:
# pivot the volume data into columns
volumes = pivotTickersToColumns(raw, "Volume")
volumes.tail()

In [None]:
# plot the volume for MSFT
msftV = volumes[["MSFT"]]
plt.bar(msftV.index, msftV["MSFT"])
plt.gcf().set_size_inches(15,8)

In [None]:
# draw the price history on the top
top = plt.subplot2grid((4,4), (0, 0), rowspan=3, colspan=4)
top.plot(close_px['MSFT'].index, close_px['MSFT'], label='MSFT Adjusted Close')
plt.title('Microsoft Ajusted Close Price 2012 - 2014')
plt.legend(loc=2)

# and the volume along the bottom
bottom = plt.subplot2grid((4,4), (3,0), rowspan=1, colspan=4)
bottom.bar(msftV.index, msftV['MSFT'])
plt.title('Microsoft Trading Volume 2012 - 2014')
plt.subplots_adjust(hspace=0.75)
plt.gcf().set_size_inches(15,8)

## Calculating simple daily percentage change

In [None]:
# calculate daily percentage change
daily_pc = close_px / close_px.shift(1) - 1
daily_pc[:5]

In [None]:
# check the percentage on 2012-01-05
close_px.ix['2012-01-05']['AAPL'] / close_px.ix['2012-01-04']['AAPL'] -1

In [None]:
# plot daily percentage change for AAPL
daily_pc["AAPL"].plot();

## Calculating simple daily cumulative returns

In [None]:
# calculate daily cumulative return
daily_cr = (1 + daily_pc).cumprod()
daily_cr[:5]

In [None]:
# plot all the cumulative returns to get an idea 
# of the relative performance of all the stocks
daily_cr.plot(figsize=(8,6))
plt.legend(loc=2);

## Resampling data from daily to monthly returns

In [None]:
# resample to end of month and forward fill values
monthly = close_px.asfreq('EOM', method="ffill")
monthly[:5]

In [None]:
# calculate the monthly percentage changes
monthly_pc = monthly / monthly.shift(1) - 1
monthly_pc[:5]

In [None]:
# calculate monthly cumulative return
monthly_cr = (1 + monthly_pc).cumprod()
monthly_cr[:5]

In [None]:
# plot the monthly cumulative returns
monthly_cr.plot(figsize=(12,6))
plt.legend(loc=2);

## Analyzing distribution of returns

In [None]:
# histogram of the daily percentage change for AAPL
aapl = daily_pc['AAPL']
aapl.hist(bins=50);

In [None]:
# descriptive statistics of the percentage changes
aapl.describe()

In [None]:
# matrix of all stocks daily % changes histograms
daily_pc.hist(bins=50, figsize=(8,6));

## Performing moving average calculation

In [None]:
# extract just MSFT close
msft_close = close_px[['MSFT']]['MSFT']
# calculate the 30 and 90 day rolling means
ma_30 = pd.stats.moments.rolling_mean(msft_close, 30)
ma_90 = pd.stats.moments.rolling_mean(msft_close, 90)
# compose into a DataFrame that can be plotted
result = pd.DataFrame({'Close': msft_close, 
                       '30_MA_Close': ma_30,
                       '90_MA_Close': ma_90})
# plot all the series against each other
result.plot(title="MSFT Close Price")
plt.gcf().set_size_inches(12,8)

## Comparision of average daily returns across stocks

In [None]:
# plot the daily percentage change of MSFT vs AAPL
plt.scatter(daily_pc['MSFT'], daily_pc['AAPL'])
plt.xlabel('MSFT')
plt.ylabel('AAPL');

In [None]:
# demonstrate perfect correlation
plt.scatter(daily_pc['MSFT'], daily_pc['MSFT']);

In [None]:
# plot the scatter of daily price changed for ALL stocks
pd.scatter_matrix(daily_pc, diagonal='kde', figsize=(12,12));

## Correlation of stocks based upon daily percentage change of closing price

In [None]:
# calculate the correlation between all the stocks relative
# to daily percentage change
corrs = daily_pc.corr()
corrs

In [None]:
# plot a heatmap of the correlations
plt.imshow(corrs, cmap='hot', interpolation='none')
plt.colorbar()
plt.xticks(range(len(corrs)), corrs.columns)
plt.yticks(range(len(corrs)), corrs.columns)
plt.gcf().set_size_inches(8,8)

# Volatility

In [None]:
# 75 period minimum
min_periods = 75
# calculate the volatility
vol = pd.stats.moments.rolling_std(daily_pc, min_periods) * \
        np.sqrt(min_periods)
# plot it
vol.plot(figsize=(10, 8));

# Determining risk relative to expected returns

In [None]:
# generate a scatter of the mean vs std of daily % change
plt.scatter(daily_pc.mean(), daily_pc.std())
plt.xlabel('Expected returns')
plt.ylabel('Risk')

# this adds fancy labels to each dot, with an arrow too
for label, x, y in zip(daily_pc.columns, 
                       daily_pc.mean(), 
                       daily_pc.std()):
    plt.annotate(
        label, 
        xy = (x, y), xytext = (30, -30),
        textcoords = 'offset points', ha = 'right', va = 'bottom',
        bbox = dict(boxstyle = 'round,pad=0.5', 
                    fc = 'yellow', 
                    alpha = 0.5),
        arrowprops = dict(arrowstyle = '->', 
                          connectionstyle = 'arc3,rad=0'))

# set ranges and scales for good presentation
plt.xlim(-0.001, 0.003)
plt.ylim(0.005, 0.0275)

# set size
plt.gcf().set_size_inches(8,8)