### Development Plan
- For all stocks in the S&P 500
    - Download updated ticker price data (last year)
    - Download senitment data


In [1]:
# Package Importation
import sys
import pandas as pd
import numpy as np
import matplotlib
import pandas_datareader as pdr
from datetime import date, timedelta
import requests
import bs4 as bs
import pickle
from os import listdir

'''
# Input functions from other notebook
import import_ipynb
from functions_1 import *
'''

# Check versions
print('python: {}'.format(sys.version))
print('pandas: {}'.format(pd.__version__))
print('numpy: {}'.format(np.__version__))
print('pandas_datareader: {}'.format(pdr.__version__))
print('requests: {}'.format(requests.__version__))
print('bs4: {}'.format(bs.__version__))

%matplotlib inline

# Quandl API 
quandl_api_key = 'Q2-ookr-KYUHAPn8aAzL'

python: 3.7.3 (default, Apr 24 2019, 15:29:51) [MSC v.1915 64 bit (AMD64)]
pandas: 0.25.1
numpy: 1.16.5
pandas_datareader: 0.8.1
requests: 2.22.0
bs4: 4.8.0


In [2]:
# Pull Tickers
def sp500_tickers():
    resp = requests.get('http://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    soup = bs.BeautifulSoup(resp.text, 'lxml')
    table = soup.find('table', {'class': 'wikitable sortable'})
    tickers = []
    for row in table.findAll('tr')[1:]:
        ticker = row.findAll('td')[0].text
        tickers.append(ticker.rstrip('\n'))
        
    with open("Forecasting_Exp_1_Data\sp500_tickers.pickle","wb") as f:
        pickle.dump(tickers,f)
        
    return tickers

def sp500_yahoo(tickers, start, end):
    yahoo_df = pd.DataFrame()
    date_string = '_from_' + str(start) + '_to_' + str(end)

    for i, ticker in enumerate(tickers):
        print("Collecting Yahoo data ", str(i+1), " of ", str(len(tickers)), ": ", ticker)
        try:
            ticker_data = pdr.get_data_yahoo(symbols=ticker, start=start, end=end)
            ticker_data['Ticker'] = ticker
            yahoo_df = yahoo_df.append(ticker_data)

        except:
            print("Exception")
            continue
            
    pickle_save_path = 'Forecasting_Exp_1_Data\sp500_yahoo' + date_string + '.pickle'
    with open(pickle_save_path,"wb") as f:
        pickle.dump(yahoo_df,f)
        
    excel_save_path = 'Forecasting_Exp_1_Data\sp500_yahoo' + date_string + '.xlsx'
    yahoo_df.to_excel(excel_save_path)

    print(yahoo_df.head())
    return yahoo_df


# Update local datasets
def update_data(start, end, overwrite_all = 0):
    '''
    Input: Dictionary of All Datasets
    Returns: Updated Data, or saved data
    '''
    date_string = '_from_' + str(start) + '_to_' + str(end)

    stored_files = listdir('Forecasting_Exp_1_Data')
    print("Stored files: ", stored_files)
    
    data_dict = {}
    
    # Add sp500 tickers
    if ('sp500_tickers.pickle' in stored_files) and (overwrite_all == 0):
        print('Loading Tickers from file')
        with open('Forecasting_Exp_1_Data\sp500_tickers.pickle', 'rb') as f:
            data_dict['sp500_tickers'] = pickle.load(f)
    else:
        print('Pulling Tickers from web')
        data_dict['sp500_tickers'] = sp500_tickers()

    # Add yahoo data
    pickle_yahoo_path = "Forecasting_Exp_1_Data\sp500_yahoo" + date_string + ".pickle"
    if (pickle_yahoo_path in stored_files) and (overwrite_all == 0):
        print('Loading Yahoo data from file')
        with open(pickle_yahoo_path, 'rb') as f:
            data_dict['sp500_yahoo'] = pickle.load(f)
    else:
        print('Pulling Yahoo data from web')
        data_dict['sp500_yahoo'] = sp500_yahoo(data_dict['sp500_tickers'], start, end)
    
    
    return data_dict


In [3]:
# Date information
today = date.today()
year_ago = today - timedelta(days=365)

# Pull Stock Data
print("Pulling data from ", year_ago, " to ", today)

data_dict = update_data(start=year_ago, end=today, overwrite_all = 1)


Pulling data from  2018-12-14  to  2019-12-14
Stored files:  ['sp500_tickers.pickle', 'sp500_yahoo_from_2018-12-14_to_2019-12-14.pickle', 'sp500_yahoo_from_2018-12-14_to_2019-12-14.xlsx']
Pulling Tickers from web
Pulling Yahoo data from web
Collecting Yahoo data  1  of  505 :  MMM
Collecting Yahoo data  2  of  505 :  ABT
Collecting Yahoo data  3  of  505 :  ABBV
Collecting Yahoo data  4  of  505 :  ABMD
Collecting Yahoo data  5  of  505 :  ACN
Collecting Yahoo data  6  of  505 :  ATVI
Collecting Yahoo data  7  of  505 :  ADBE
Collecting Yahoo data  8  of  505 :  AMD
Collecting Yahoo data  9  of  505 :  AAP
Collecting Yahoo data  10  of  505 :  AES
Collecting Yahoo data  11  of  505 :  AMG
Collecting Yahoo data  12  of  505 :  AFL
Collecting Yahoo data  13  of  505 :  A
Collecting Yahoo data  14  of  505 :  APD
Collecting Yahoo data  15  of  505 :  AKAM
Collecting Yahoo data  16  of  505 :  ALK
Collecting Yahoo data  17  of  505 :  ALB
Collecting Yahoo data  18  of  505 :  ARE
Collectin

Collecting Yahoo data  188  of  505 :  FFIV
Collecting Yahoo data  189  of  505 :  FB
Collecting Yahoo data  190  of  505 :  FAST
Collecting Yahoo data  191  of  505 :  FRT
Collecting Yahoo data  192  of  505 :  FDX
Collecting Yahoo data  193  of  505 :  FIS
Collecting Yahoo data  194  of  505 :  FITB
Collecting Yahoo data  195  of  505 :  FE
Collecting Yahoo data  196  of  505 :  FRC
Collecting Yahoo data  197  of  505 :  FISV
Collecting Yahoo data  198  of  505 :  FLT
Collecting Yahoo data  199  of  505 :  FLIR
Collecting Yahoo data  200  of  505 :  FLS
Collecting Yahoo data  201  of  505 :  FMC
Collecting Yahoo data  202  of  505 :  F
Collecting Yahoo data  203  of  505 :  FTNT
Collecting Yahoo data  204  of  505 :  FTV
Collecting Yahoo data  205  of  505 :  FBHS
Collecting Yahoo data  206  of  505 :  FOXA
Collecting Yahoo data  207  of  505 :  FOX
Collecting Yahoo data  208  of  505 :  BEN
Collecting Yahoo data  209  of  505 :  FCX
Collecting Yahoo data  210  of  505 :  GPS
Collect

Collecting Yahoo data  378  of  505 :  PNW
Collecting Yahoo data  379  of  505 :  PXD
Collecting Yahoo data  380  of  505 :  PNC
Collecting Yahoo data  381  of  505 :  PPG
Collecting Yahoo data  382  of  505 :  PPL
Collecting Yahoo data  383  of  505 :  PFG
Collecting Yahoo data  384  of  505 :  PG
Collecting Yahoo data  385  of  505 :  PGR
Collecting Yahoo data  386  of  505 :  PLD
Collecting Yahoo data  387  of  505 :  PRU
Collecting Yahoo data  388  of  505 :  PEG
Collecting Yahoo data  389  of  505 :  PSA
Collecting Yahoo data  390  of  505 :  PHM
Collecting Yahoo data  391  of  505 :  PVH
Collecting Yahoo data  392  of  505 :  QRVO
Collecting Yahoo data  393  of  505 :  PWR
Collecting Yahoo data  394  of  505 :  QCOM
Collecting Yahoo data  395  of  505 :  DGX
Collecting Yahoo data  396  of  505 :  RL
Collecting Yahoo data  397  of  505 :  RJF
Collecting Yahoo data  398  of  505 :  RTN
Collecting Yahoo data  399  of  505 :  O
Collecting Yahoo data  400  of  505 :  REG
Collecting Ya

In [4]:
#print(data_dict['sp500_tickers'])
#print(pdr.get_data_yahoo(symbols='AAPL', start=year_ago, end=today).tail())