# Financial time series with python


In [21]:
# Import relevant packages
import pandas as pd
import numpy as np
import os, itertools, sys
import matplotlib.pylab as plt
from matplotlib.gridspec import GridSpec

from pandas.io.data import DataReader
from datetime import datetime

Now we are going to load stock data using yahoo finance.  
We load Microsoft and Apples's adjusted close.

>Adjusted close price : A stock's closing price on any given day of trading that has been amended to include any distributions and corporate actions that occurred at any time prior to the next day's open. The adjusted closing price is often used when examining historical returns or performing a detailed analysis on historical returns.



In [22]:
# Syntax : stock_symbol, data_source
# so if we write 'AAPL',  'yahoo'
# we get Apple stocks from yahoo finance
# we can also specify dates between which to take the stocks

apple = DataReader('AAPL',  'yahoo')
MSFT = DataReader('MSFT',  'yahoo')

#### Let's have a look at this data frame

In [23]:
# Print columns names
print apple.iloc[:10, :] # show the first 10 entries

                  Open        High         Low       Close     Volume  \
Date                                                                    
2010-01-04  213.429998  214.499996  212.380001  214.009998  123432400   
2010-01-05  214.599998  215.589994  213.249994  214.379993  150476200   
2010-01-06  214.379993  215.230000  210.750004  210.969995  138040000   
2010-01-07  211.750000  212.000006  209.050005  210.580000  119282800   
2010-01-08  210.299994  212.000006  209.060005  211.980005  111902700   
2010-01-11  212.799997  213.000002  208.450005  210.110003  115557400   
2010-01-12  209.189995  209.769995  206.419998  207.720001  148614900   
2010-01-13  207.870005  210.929995  204.099998  210.650002  151473000   
2010-01-14  210.110003  210.459997  209.020004  209.430000  108223500   
2010-01-15  210.929995  211.599997  205.869999  205.930000  148516900   

            Adj Close  
Date                   
2010-01-04  28.466830  
2010-01-05  28.516046  
2010-01-06  28.062460  
201

We see that we do not have a Data column : the Date is the index.
Let's rearrange the data frame, it will be easier :

- We'll move the current index to a new column called "Data"
- And we'll reset the existing index to 0, 1, .... number of days

This can be done in a single pandas command :

In [24]:
# Let's do it for both dataframes
apple.reset_index(level=0, inplace=True) # inplace means we do not create a new data frame object
MSFT.reset_index(level=0, inplace=True)

#### Let's check the results :

In [25]:
print apple.iloc[:10,:]

print len(apple), len(MSFT)
print MSFT.iloc[:10,:]

# We may also want to get the time difference between contiguous points
# the line below shows how to do so
#print apple["Date"].diff().astype('timedelta64[D]')

        Date        Open        High         Low       Close     Volume  \
0 2010-01-04  213.429998  214.499996  212.380001  214.009998  123432400   
1 2010-01-05  214.599998  215.589994  213.249994  214.379993  150476200   
2 2010-01-06  214.379993  215.230000  210.750004  210.969995  138040000   
3 2010-01-07  211.750000  212.000006  209.050005  210.580000  119282800   
4 2010-01-08  210.299994  212.000006  209.060005  211.980005  111902700   
5 2010-01-11  212.799997  213.000002  208.450005  210.110003  115557400   
6 2010-01-12  209.189995  209.769995  206.419998  207.720001  148614900   
7 2010-01-13  207.870005  210.929995  204.099998  210.650002  151473000   
8 2010-01-14  210.110003  210.459997  209.020004  209.430000  108223500   
9 2010-01-15  210.929995  211.599997  205.869999  205.930000  148516900   

   Adj Close  
0  28.466830  
1  28.516046  
2  28.062460  
3  28.010584  
4  28.196808  
5  27.948067  
6  27.630158  
7  28.019896  
8  27.857616  
9  27.392058  
1519 1519

Success !  

Now we are going to monitor the evolution of the opening stock price over time for both companies.  
So let's merge both data sets.

Our code will do the following operation :

- Rename Adj Close in the Apple dataframe to Adj CloseAAPL
- Rename Adj Close in the MSFT dataframe to Adj CloseMSFT
- Check that the Date column in both dataframes are equal
- Create a new dataframe with columns Date, Adj CloseAAPL and Adj CloseMSFT


In [26]:
# First let's check that the series have the same length
assert(len(apple) == len(MSFT))

# Then check if series are sampled identically
arr =  apple["Date"].values == MSFT["Date"].values
# Let's see if this condition is violated :

l_uneq = np.where(arr == False)[0]
print l_uneq

assert(len(l_uneq) == 0)

[]


So far, so good, the data frames are sampled at the same points in time !  
Let's carry on with the merge.

In [28]:
# First let's rename the columns to avoid confusion.
apple.columns = ["Date", "Open", "High", "Low", "Close", "Volume" , "Adj CloseAAPL"]
MSFT.columns = ["Date", "Open", "High", "Low", "Close", "Volume", "Adj CloseMSFT"]

# We will only use one Date column, since we've seen the two were the same

# Now merge the data sets so that they are side by side.
stocks = pd.concat([apple[["Date", "Adj CloseAAPL"]], MSFT[["Adj CloseMSFT"]]], axis =1) # use axis =1 to concat side by side

# Let's inspect the results :

print stocks.iloc[:10,:]

        Date  Adj CloseAAPL  Adj CloseMSFT
0 2010-01-04      28.466830      26.415914
1 2010-01-05      28.516046      26.424448
2 2010-01-06      28.062460      26.262284
3 2010-01-07      28.010584      25.989163
4 2010-01-08      28.196808      26.168398
5 2010-01-11      27.948067      25.835532
6 2010-01-12      27.630158      25.664831
7 2010-01-13      28.019896      25.903812
8 2010-01-14      27.857616      26.424448
9 2010-01-15      27.392058      26.339099


Now let's plot the stock price at the opening :

In [30]:
# Create a plot and a grid
fig = plt.figure(figsize=(15, 15))
gs = GridSpec(2, 1, bottom=0.18, left=0.18, right=0.88)

axAPPLE = fig.add_subplot(gs[0])
axMSFT = fig.add_subplot(gs[1])

# Get time axis in days
# Subtract first sample and convert time to minutes
x_day = stocks["Date"]
# Get the Open values for both stock
y_APPL, y_MSFT = stocks["Adj CloseAAPL"].values, stocks["Adj CloseMSFT"].values

# Plot Open
axAPPLE.plot(x_day, y_MSFT, "k", label="MSFT Adj Close", linewidth=1.5)
axMSFT.plot(x_day, y_APPL, "r", label="APPLE Adj Close", linewidth=1.5)


#####################
# Figure cosmetics
#####################

# Axis labels, legend and formatting
axAPPLE.set_xlabel("time", fontsize=22)
axAPPLE.set_ylabel("Open", fontsize=22)
axAPPLE.legend(loc="best", fontsize=22)

axMSFT.set_xlabel("time", fontsize=22)
axMSFT.set_ylabel("Open", fontsize=22)
axMSFT.legend(loc="best", fontsize=22)

plt.show()

Now I'm going to show captures of the same type of data from YAHOO finance

Apple:

![AAPL](AAPL.png)

Microsoft :

![MSFT](MSFT.png)

We see that we get the same curves (up to some smoothing) !