In [1]:
# Import libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import pandas_datareader.data as web

import yfinance as yf

### Acquire the stock data from Yahoo finance
- yf.Ticker().history()
- yf.download
- pandas data reader

**Through `yf.Ticker().history()`**

In [12]:
# Get the Walmrt stock infomation

wmt = yf.Ticker("WMT") # the dtype of the wmt is a dictionary
wmt # Return a Ticker object

yfinance.Ticker object <WMT>

In [6]:
# Get market data in the past 5 days

hist = wmt.history(period="5d")
hist.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-11-30,151.600006,152.949997,150.100006,152.789993,10898500,0,0
2020-12-01,153.600006,153.660004,151.660004,152.639999,7647100,0,0
2020-12-02,152.0,152.619995,149.529999,150.520004,7849000,0,0
2020-12-03,150.279999,150.279999,148.389999,149.300003,8571900,0,0
2020-12-04,149.509995,149.453705,147.589996,147.919998,2794480,0,0


**Takeaways**
- The dtypes of the index is datetime.
- Don't have the adjusted close price.
- Doesn't support fetching data from multiple tickers.

**Fetching data for multiple tickers using `yf.download()`**

In [33]:
# Create a string of multiple tickers
tickers = 'AAPL WMT TSLA GE AMZN DB'

# Acquire the adjusted closing price

data = yf.download(tickers, '2020-12-01', '2020-12-04')['Adj Close']
data

[*********************100%***********************]  6 of 6 completed


Unnamed: 0_level_0,AAPL,AMZN,DB,GE,TSLA,WMT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-12-01,122.720001,3220.080078,11.45,10.15,584.76001,152.639999
2020-12-02,123.080002,3203.530029,11.58,10.43,568.820007,150.520004
2020-12-03,122.940002,3186.72998,11.77,10.6,593.380005,149.300003


**Takeaways**
- The order of the stocks changes to alphabetical order.
- The last day in the downloaded data is the previous day of end_date inputted in the yf.download method.  

**Use pandas datareader to read stock data from yahoo finance**

In [34]:
# Create a list of the stocks you are interested
stocks = ['AAPL', 'WMT', 'TSLA', 'GE', 'AMZN', 'DB']

# Specify the start date and end date

start_date = '2020-12-01'
end_date = '2020-12-03'

# Acquire the data
data = web.DataReader(stocks, data_source='yahoo', start=start_date, end=end_date)['Adj Close']
data

Symbols,AAPL,WMT,TSLA,GE,AMZN,DB
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-12-01,122.720001,152.639999,584.76001,10.15,3220.080078,11.45
2020-12-02,123.080002,150.520004,568.820007,10.43,3203.530029,11.58
2020-12-03,122.940002,149.300003,593.380005,10.6,3186.72998,11.77


In [35]:
# Rename the columns

data.columns = stocks
data

Unnamed: 0_level_0,AAPL,WMT,TSLA,GE,AMZN,DB
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-12-01,122.720001,152.639999,584.76001,10.15,3220.080078,11.45
2020-12-02,123.080002,150.520004,568.820007,10.43,3203.530029,11.58
2020-12-03,122.940002,149.300003,593.380005,10.6,3186.72998,11.77


**Takeaways**
- Take a little bit longer than the yf.download.
- The order of the stocks remain the same in the dataframe. 

**Build the helper function to fetch the data**

### Acquire metadata about the anonymized features

In [4]:
# Load metadata about features
df_features = pd.read_csv("Database/features.csv")
df_features.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 130 entries, 0 to 129
Data columns (total 30 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   feature  130 non-null    object
 1   tag_0    130 non-null    bool  
 2   tag_1    130 non-null    bool  
 3   tag_2    130 non-null    bool  
 4   tag_3    130 non-null    bool  
 5   tag_4    130 non-null    bool  
 6   tag_5    130 non-null    bool  
 7   tag_6    130 non-null    bool  
 8   tag_7    130 non-null    bool  
 9   tag_8    130 non-null    bool  
 10  tag_9    130 non-null    bool  
 11  tag_10   130 non-null    bool  
 12  tag_11   130 non-null    bool  
 13  tag_12   130 non-null    bool  
 14  tag_13   130 non-null    bool  
 15  tag_14   130 non-null    bool  
 16  tag_15   130 non-null    bool  
 17  tag_16   130 non-null    bool  
 18  tag_17   130 non-null    bool  
 19  tag_18   130 non-null    bool  
 20  tag_19   130 non-null    bool  
 21  tag_20   130 non-null    bool  
 22  ta

In [5]:
# Print the first 5 rows
df_features.head()

Unnamed: 0,feature,tag_0,tag_1,tag_2,tag_3,tag_4,tag_5,tag_6,tag_7,tag_8,...,tag_19,tag_20,tag_21,tag_22,tag_23,tag_24,tag_25,tag_26,tag_27,tag_28
0,feature_0,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,feature_1,False,False,False,False,False,False,True,True,False,...,False,False,False,False,False,False,False,False,False,False
2,feature_2,False,False,False,False,False,False,True,True,False,...,False,False,False,False,False,False,False,False,False,False
3,feature_3,False,False,False,False,False,False,True,False,True,...,False,False,False,False,False,False,False,False,False,False
4,feature_4,False,False,False,False,False,False,True,False,True,...,False,False,False,False,False,False,False,False,False,False
