In [1]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [3]:
import matplotlib.pyplot as plt
import warnings

plt.style.use('seaborn')
# plt.style.use('seaborn-colorblind') #alternative
# plt.rcParams['figure.figsize'] = [16, 9]
plt.rcParams['figure.dpi'] = 300
warnings.simplefilter(action='ignore', category=FutureWarning)

# Financial Data and Preprocessing 

## Getting data from Yahoo Finance

1. Import the libraries:

In [1]:
import pandas as pd 
import yfinance as yf

2. Download the data:

In [2]:
df_yahoo = yf.download('AAPL', 
                       start='2000-01-01', 
                       end='2010-12-31',
                       progress=False)

3. Inspect the data:

In [3]:
print(f'Downloaded {df_yahoo.shape[0]} rows of data.')

df_yahoo.head()

Downloaded 2767 rows of data.


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1999-12-31,3.604911,3.674107,3.553571,3.671875,3.194901,40952800
2000-01-03,3.745536,4.017857,3.631696,3.997768,3.478462,133949200
2000-01-04,3.866071,3.950893,3.613839,3.660714,3.185191,128094400
2000-01-05,3.705357,3.948661,3.678571,3.714286,3.231803,194580400
2000-01-06,3.790179,3.821429,3.392857,3.392857,2.952128,191993200


## Getting data from Quandl 

1. Import the libraries:

In [4]:
import pandas as pd 
import quandl

2. Authenticate using the personal API key:

In [34]:
QUANDL_KEY = '{key}' # replace {key} with your own API key  
quandl.ApiConfig.api_key = QUANDL_KEY

3. Download the data:

In [11]:
df_quandl = quandl.get(dataset='WIKI/AAPL',
                       start_date='2000-01-01', 
                       end_date='2010-12-31')

4. Inspect the data:

In [12]:
print(f'Downloaded {df_quandl.shape[0]} rows of data.')

df_quandl.head()

Downloaded 2767 rows of data.


Unnamed: 0_level_0,Open,High,Low,Close,Volume,Ex-Dividend,Split Ratio,Adj. Open,Adj. High,Adj. Low,Adj. Close,Adj. Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2000-01-03,104.87,112.5,101.69,111.94,4783900.0,0.0,1.0,3.369314,3.614454,3.267146,3.596463,133949200.0
2000-01-04,108.25,110.62,101.19,102.5,4574800.0,0.0,1.0,3.477908,3.554053,3.251081,3.29317,128094400.0
2000-01-05,103.75,110.56,103.0,104.0,6949300.0,0.0,1.0,3.33333,3.552125,3.309234,3.341362,194580400.0
2000-01-06,106.12,107.0,95.0,95.0,6856900.0,0.0,1.0,3.409475,3.437748,3.052206,3.052206,191993200.0
2000-01-07,96.5,101.0,95.5,99.5,4113700.0,0.0,1.0,3.100399,3.244977,3.06827,3.196784,115183600.0


## Getting data from Intrinio

1. Import the libraries:

In [5]:
import intrinio_sdk
import pandas as pd

2. Authenticate using the personal API key and select the API:

In [6]:
intrinio_sdk.ApiClient().configuration.api_key['api_key'] = '{key}'  # replace {key} with your own API key  
security_api = intrinio_sdk.SecurityApi()

3. Request the data:

In [7]:
r = security_api.get_security_stock_prices(identifier='AAPL', 
                                           start_date='2000-01-01',
                                           end_date='2010-12-31', 
                                           frequency='daily',
                                           page_size=10000)

4. Convert the results into a DataFrame:

In [8]:
response_list = [x.to_dict() for x in r.stock_prices]
df_intrinio = pd.DataFrame(response_list).sort_values('date')
df_intrinio.set_index('date', inplace=True)

5. Inspect the data:

In [9]:
print(f'Downloaded {df_intrinio.shape[0]} rows of data.')

df_intrinio.head()

Downloaded 2771 rows of data.


Unnamed: 0_level_0,intraperiod,frequency,open,high,low,close,volume,adj_open,adj_high,adj_low,adj_close,adj_volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2000-01-03,False,daily,104.87,112.5,101.69,111.94,4783900.0,3.258837,3.49594,3.160019,3.478538,133949200.0
2000-01-04,False,daily,108.25,110.62,101.19,102.5,4574800.0,3.363871,3.437519,3.144481,3.18519,128094400.0
2000-01-05,False,daily,103.75,110.56,103.0,104.0,6949300.0,3.224033,3.435654,3.200727,3.231802,194580400.0
2000-01-06,False,daily,106.12,107.0,95.0,95.0,6856900.0,3.297681,3.325027,2.952127,2.952127,191993200.0
2000-01-07,False,daily,96.5,101.0,95.5,99.5,4113700.0,2.998739,3.138577,2.967664,3.091965,115183600.0
