In [7]:
import pandas as pd
import numpy as np
import matplotlib as mpl
mpl.get_backend()
import matplotlib.pyplot as plt

# To look at stock data, we need to import pandas_datareader package, since it allows us to read in data 
# from sources such as Google, World Bank, and Yahoo.

import pandas_datareader as pdr
import yfinance as yf
import datetime

In [8]:
# Write a function to download stocks
def get(tickers, startdate, enddate):
    def data(ticker): # takes the ticker to get data from startdate to enddate
        return (yf.download(ticker, start=startdate, end=enddate))
    
    # Map the data with the right tickers and return a dataframe that concatenates the mapped data w the tickers
    datas = map(data, tickers)

    return(pd.concat(datas, keys=tickers, names=['Ticker', 'Date']))

tickers = ['AAPL', 'MSFT', 'IBM', 'GOOG', '^GSPC']  # Use '^GSPC' for S&P 500
stocks = get(tickers, datetime.datetime(2006,1,1), datetime.datetime(2016,1,1))

stocks.head()

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Unnamed: 1_level_0,Open,High,Low,Close,Adj Close,Volume
Ticker,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AAPL,2006-01-03,2.585,2.669643,2.580357,2.669643,2.269043,807234400
AAPL,2006-01-04,2.683214,2.713571,2.660714,2.6775,2.275722,619603600
AAPL,2006-01-05,2.6725,2.675,2.633929,2.656429,2.257813,449422400
AAPL,2006-01-06,2.6875,2.739286,2.6625,2.725,2.316093,704457600
AAPL,2006-01-09,2.740357,2.757143,2.705,2.716071,2.308504,675040800


In [11]:
# reset index to have the Date and ticker be columns 
stocks = stocks.reset_index()
stocks.head()

Unnamed: 0,Ticker,Date,Open,High,Low,Close,Adj Close,Volume
0,AAPL,2006-01-03,2.585,2.669643,2.580357,2.669643,2.269043,807234400
1,AAPL,2006-01-04,2.683214,2.713571,2.660714,2.6775,2.275722,619603600
2,AAPL,2006-01-05,2.6725,2.675,2.633929,2.656429,2.257813,449422400
3,AAPL,2006-01-06,2.6875,2.739286,2.6625,2.725,2.316093,704457600
4,AAPL,2006-01-09,2.740357,2.757143,2.705,2.716071,2.308504,675040800


In [13]:
# to get Python to manipulate how a date is formated, we need to import the native datetime module
from datetime import datetime

stocks['Date'] = pd.to_datetime(stocks['Date'])

# Create a ne wvariable to hold the year which we will format to be the full year name 
stocks['Year'] = stocks['Date'].dt.strftime('%Y')

# Change Ticker to company name
stocks['Company'] = stocks['Ticker']

stocks.head()

Unnamed: 0,Ticker,Date,Open,High,Low,Close,Adj Close,Volume,Year,Company
0,AAPL,2006-01-03,2.585,2.669643,2.580357,2.669643,2.269043,807234400,2006,AAPL
1,AAPL,2006-01-04,2.683214,2.713571,2.660714,2.6775,2.275722,619603600,2006,AAPL
2,AAPL,2006-01-05,2.6725,2.675,2.633929,2.656429,2.257813,449422400,2006,AAPL
3,AAPL,2006-01-06,2.6875,2.739286,2.6625,2.725,2.316093,704457600,2006,AAPL
4,AAPL,2006-01-09,2.740357,2.757143,2.705,2.716071,2.308504,675040800,2006,AAPL


In [14]:
# Pull out the Company, Year and High stock price value for each company

stockshigh = stocks[['Company', 'Year', 'High']]

stockshigh.head()

Unnamed: 0,Company,Year,High
0,AAPL,2006,2.669643
1,AAPL,2006,2.713571
2,AAPL,2006,2.675
3,AAPL,2006,2.739286
4,AAPL,2006,2.757143
