# Fetch Data
The purpose of this notebook is to fetch multiple REIT ETFs' historical data from Yahoo Finance and combine them into one dataframe.

In [1]:
import pandas as pd
import numpy as np
# import matplotlib.pyplot as plt

import yfinance as yf

In [2]:
# Import the ticker name
ticker_name = pd.read_csv('data/ticker_name.csv')
ticker_name.head()

Unnamed: 0,Symbol
0,VNQ
1,IYR
2,SCHH
3,XLRE
4,ICF


In [3]:
top28 = ticker_name['Symbol'].tolist()
top28[:10]

['VNQ', 'IYR', 'SCHH', 'XLRE', 'ICF', 'USRT', 'RWR', 'FREL', 'REM', 'BBRE']

## Instruction
1. Function 1
    - This function helps to select REIT ETFs which has certain of historical data
2. Function 2
    - Fetch historical data from Yahoo Finance
3. Function 3
    - Combine the multiple data into a single dataframe

In [4]:
# Function 1
def select_ticker(symbols, year):
    '''
    Select tickers have siginificant years of history.
    ex. 2011 => means that stock has at least 2021-2011 = 10 years of history
    
    -*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
    
    Inputs: 
    - symbols: ticker 
    - year: int
        ex. 2010
    
    Return:
    - new_ticker_list that fulfilled the requirement
    '''
    
    timestamps = []
    incep_year = []
    
    for symbol in symbols:
        # Acquire the inception date for each ticker
        timestamps.append(yf.Ticker(symbol).info.get('fundInceptionDate'))
    
    for timestamp in timestamps: 
        # Convert epoch unix to readable dates
        incep_year.append(pd.to_datetime(timestamp, unit='s').year)
    
    # Create a dictionary to pair the ticker with corresponding year
    ticker_dict = dict(zip(symbols, incep_year))
    
    # Create a ticker df
    ticker_df = pd.DataFrame(list(ticker_dict.items()), columns=['symbol', 'start_year'])
    
    # Select desire ticker based on inception date
    new_ticker_df = ticker_df.loc[ticker_df['start_year'] <= year]
    
    new_ticker_list = new_ticker_df['symbol'].tolist()
                                                     
    return new_ticker_list
                                                     
                                                     
                                                     
# Function 2                                                     
def get_stock_data(symbols):
    '''
    Acquire the historical data from Yahoo Finance.
    
    -*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
    
    Input:
    - symbols: ticker
    
    Return:
    - Historical data from Yahoo Finance
    
    '''
    
    # Access ticker data
    ticker = yf.Ticker(symbols)
    
    # Get historical market data
    data = ticker.history(period='max')
    
    return data



# Function 3
def get_data(symbols):
    '''
    Acquire the historical data from Yahoo Finance by calling get_stock_data function.
    Put multiple REIT ETFs's historical data in one dataframe.
    
    -*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
    
    Input:
    - symbol: ticker
    
    Return:
    - Panda dataframe
    '''
    
    # Create a blank dataframe
    df = pd.DataFrame()
    
    for symbol in symbols:
        try:
            # Get all historical market data for all tickers
            df_extra = get_stock_data(symbol)
            
            # Add an extra column to label the ticker
            df_extra['Ticker'] = symbol
            
            # Concatenate all the piece
            df = pd.concat([df,df_extra])
        
        except:
            print(f'Ticker error:{symbol}')
     
    return df

In [5]:
# This should take a little bit longer
# Select REIT ETFs have at least of 10 years history
ticker_list = select_ticker(top28, 2011)

In [6]:
len(ticker_list)

15

In [7]:
data = get_data(ticker_list)
data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Ticker
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2004-09-29,23.794544,23.861371,23.789772,23.794544,205800,0.0,0.0,VNQ
2004-09-30,23.866143,24.023660,23.861370,23.985474,27900,0.0,0.0,VNQ
2004-10-01,24.033207,24.462799,23.985476,24.429386,129800,0.0,0.0,VNQ
2004-10-04,24.529629,24.606000,24.529629,24.534403,8100,0.0,0.0,VNQ
2004-10-05,24.534403,24.582135,24.510535,24.529629,11900,0.0,0.0,VNQ
...,...,...,...,...,...,...,...,...
2021-06-17,9.280000,9.290000,9.220000,9.220000,5200,0.0,0.0,REK
2021-06-18,9.330000,9.370000,9.270000,9.370000,22800,0.0,0.0,REK
2021-06-21,9.320000,9.320000,9.170000,9.170000,22900,0.0,0.0,REK
2021-06-22,9.160000,9.230000,9.160000,9.220000,6200,0.0,0.0,REK


In [8]:
data['Ticker'].nunique()

15

In [9]:
# Save data to csv file
data.to_csv('./data/REIT_ETFs.csv')