# Forecasting
______________________________________________________

### Import Relevant Packages

In [11]:
# Package Importation
import sys
import pandas as pd
import numpy as np
import matplotlib
import pandas_datareader as pdr
from datetime import date, timedelta
import requests
import bs4 as bs
import pickle
from os import listdir
import quandl

'''
# Input functions from other notebook
import import_ipynb
from functions_1 import *
'''

# Check versions
print('python: {}'.format(sys.version))
print('pandas: {}'.format(pd.__version__))
print('numpy: {}'.format(np.__version__))
print('pandas_datareader: {}'.format(pdr.__version__))
print('requests: {}'.format(requests.__version__))
print('bs4: {}'.format(bs.__version__))

%matplotlib inline

# Quandl API Configuration
quandl.ApiConfig.api_key = 'Q2-ookr-KYUHAPn8aAzL'

python: 3.7.3 (default, Apr 24 2019, 15:29:51) [MSC v.1915 64 bit (AMD64)]
pandas: 0.25.1
numpy: 1.16.5
pandas_datareader: 0.8.1
requests: 2.22.0
bs4: 4.8.0


### Functions to Import Ticker and S&P 500 Price/ Volume Data

In [7]:
# Pull Tickers
def sp500_tickers():
    resp = requests.get('http://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    soup = bs.BeautifulSoup(resp.text, 'lxml')
    table = soup.find('table', {'class': 'wikitable sortable'})
    tickers = []
    for row in table.findAll('tr')[1:]:
        ticker = row.findAll('td')[0].text
        tickers.append(ticker.rstrip('\n'))
        
    with open("Forecasting_Exp_1_Data\sp500_tickers.pickle","wb") as f:
        pickle.dump(tickers,f)
        
    return tickers

def sp500_yahoo(tickers, start, end):
    yahoo_df = pd.DataFrame()
    date_string = '_from_' + str(start) + '_to_' + str(end)

    for i, ticker in enumerate(tickers):
        print("Collecting Yahoo data ", str(i+1), " of ", str(len(tickers)), ": ", ticker)
        try:
            ticker_data = pdr.get_data_yahoo(symbols=ticker, start=start, end=end)
            ticker_data['Ticker'] = ticker
            yahoo_df = yahoo_df.append(ticker_data)

        except:
            print("Exception")
            continue
            
    pickle_save_path = 'Forecasting_Exp_1_Data\sp500_yahoo' + date_string + '.pickle'
    with open(pickle_save_path,"wb") as f:
        pickle.dump(yahoo_df,f)
        
    excel_save_path = 'Forecasting_Exp_1_Data\sp500_yahoo' + date_string + '.xlsx'
    yahoo_df.to_excel(excel_save_path)

    return yahoo_df


###  Functions to Import Quandl Data

In [21]:
def get_sentiment(start, end):
    date_string = '_from_' + str(start) + '_to_' + str(end)

    sent_df = pd.DataFrame(quandl.get('AAII/AAII_SENTIMENT', start_date=start, end_date=end))
    
    pickle_save_path = 'Forecasting_Exp_1_Data\sentiment' + date_string + '.pickle'
    with open(pickle_save_path,"wb") as f:
        pickle.dump(sent_df,f)
        
    excel_save_path = 'Forecasting_Exp_1_Data\sentiment' + date_string + '.xlsx'
    sent_df.to_excel(excel_save_path)
    return quandl.get('AAII/AAII_SENTIMENT', start_date=start, end_date=end)


### Functions to Update Data Dictionary (Local Datasets)

In [22]:
# Update local datasets
def update_data(start, end, overwrite_all = 0):
    '''
    Input: Dictionary of All Datasets
    Returns: Updated Data, or saved data
    '''
    date_string = '_from_' + str(start) + '_to_' + str(end)

    stored_files = listdir('Forecasting_Exp_1_Data')
    print("Stored files: ", stored_files)
    
    data_dict = {}
    
    # Add sp500 tickers
    if ('sp500_tickers.pickle' in stored_files) and (overwrite_all == 0):
        print('Loading Tickers from file')
        with open('Forecasting_Exp_1_Data\sp500_tickers.pickle', 'rb') as f:
            data_dict['sp500_tickers'] = pickle.load(f)
    else:
        print('Pulling Tickers from web')
        data_dict['sp500_tickers'] = sp500_tickers()

    # Add yahoo data
    pickle_yahoo_path = "sp500_yahoo" + date_string + ".pickle"
    pickle_yahoo_dir_path = "Forecasting_Exp_1_Data\sp500_yahoo" + date_string + ".pickle"
    if (pickle_yahoo_path in stored_files) and (overwrite_all == 0):
        print('Loading Yahoo price data from file')
        with open(pickle_yahoo_dir_path, 'rb') as f:
            data_dict['sp500_yahoo'] = pickle.load(f)
    else:
        print('Pulling Yahoo price data from web')
        data_dict['sp500_yahoo'] = sp500_yahoo(data_dict['sp500_tickers'], start, end)
    
    # Add sentiment Data
    pickle_sent_path = "sentiment" + date_string + ".pickle"
    pickle_sent_dir_path = "Forecasting_Exp_1_Data\sentiment" + date_string + ".pickle"
    if (pickle_sent_path in stored_files) and (overwrite_all == 0):
        print('Loading sentiment data from file')
        with open(pickle_sent_dir_path, 'rb') as f:
            data_dict['sentiment'] = pickle.load(f)
    else:
        print('Pulling sentiment data from web')
        data_dict['sentiment'] = get_sentiment(start, end)
        
    return data_dict

### Main Commands

In [24]:
# Date information
today = date.today()
year_ago = today - timedelta(days=365)

# Pull Stock Data
print("Pulling data from ", year_ago, " to ", today)

data_dict = update_data(start=year_ago, end=today, overwrite_all = 0)


Pulling data from  2018-12-14  to  2019-12-14
Stored files:  ['sentiment_from_2018-12-14_to_2019-12-14.pickle', 'sentiment_from_2018-12-14_to_2019-12-14.xlsx', 'sp500_tickers.pickle', 'sp500_yahoo_from_2018-12-14_to_2019-12-14.pickle', 'sp500_yahoo_from_2018-12-14_to_2019-12-14.xlsx']
Loading Tickers from file
Loading Yahoo data from file
Loading sentiment data from file
