## Description:

This notebook will scrape `http://www.nasdaq.com/earnings/earnings-calendar.aspx` in order to obtain data regarding upcoming earnings announcements. <br>

http://www.nasdaq.com/earnings/earnings-calendar.aspx?date=2018-Jan-17 <br>

Notice the how the date is formatted at the end of the URL.


In [135]:
# import libs
import urllib.request
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import quandl
import datetime

# get the ticker within the paranthesis
def get_ticker( s ):

    return s[s.find("(")+1:s.find(")")]
    

# date must be: yyyy-mmm-dd (i.e. 2018-Jan-16)
def scrape_build_df( date ):
    
    url = "http://www.nasdaq.com/earnings/earnings-calendar.aspx?date=" + date
    
    df = pd.read_html(url)[3]
    df.columns = ['Time','Company/MktCap','ExpReportDate','QuarterEnd','EPS_Exp',
                  'no_Est','LastYearReportDate','LastYearEPS','pct_surprise']
    
    del df['Time']
    del df['LastYearReportDate']
    del df['LastYearEPS']
    del df['pct_surprise']
    
    df['Ticker'] = np.vectorize(get_ticker)(df['Company/MktCap'])
    df = df.set_index('Ticker')
    
    df = df.sort_values("no_Est", ascending=False)
    df = df[df.no_Est >= 6]
    
    universe = df.index.values.tolist()
    final_universe = [(stock, df.loc[stock]['EPS_Exp'], df.loc[stock]['no_Est']) 
                      for stock in universe]

    return df, universe, final_universe

# use quandle to retrieve stock data
quandl.ApiConfig.api_key = 'j9T_hnwjKBytmt7sEdV6'
def quandl_stocks( symbol, start_date=(2000, 1, 1), end_date=None ):
    """
    symbol is a string representing a stock symbol, e.g. 'AAPL'
 
    start_date and end_date are tuples of integers representing the year, month,
    and day
 
    end_date defaults to the current date when None
    """
 
    query_list = ['WIKI' + '/' + symbol + '.' + str(k) for k in range(1, 13)]
 
    start_date = datetime.date(*start_date)
 
    if end_date:
        end_date = datetime.date(*end_date)
    else:
        end_date = datetime.date.today()
 
    return quandl.get(query_list, 
            returns='pandas', 
            start_date=start_date,
            end_date=end_date,
            collapse='daily',
            order='asc'
            )

# function to handle stock data retrieval errors
def stock_handler( stock, start_date=(2016, 7, 9) ):

    try:
        stock_df = quandl_stocks(stock, start_date=(2016, 7, 9))
        adj_df = adjust_df( stock, stock_df )
        
        return adj_df

    except ValueError:
        return pd.DataFrame()
    
# mutate the "stock_df" in the above so it is 
# simply the series of Adj Close prices
def adjust_df( ticker, stock_df ):
    
    adj_df = pd.DataFrame(stock_df['WIKI/' + ticker + ' - Adj. Close'])
    adj_df.columns = ['close_price']
    
    return adj_df
    
    
# build master dataframe    
def get_master_dict( universe ):
    
    return {elm[0]: [stock_handler(elm[0], start_date=(2016, 7, 9)), elm[1], elm[2]]
            for elm in universe if not stock_handler(elm[0], start_date=(2016, 7, 9)).empty}
    
# main function
def driver( date ):
    
    return get_master_dict(scrape_build_df(date)[2])

In [136]:
# Testing all of the above functions
date = "2017-Jan-09"
master_dict = driver(date)


In [137]:
master_dict.keys()

dict_keys(['GPN', 'AYI', 'CUDA', 'CMC'])

In [143]:
def get_bs_params( cp_df ):
    
    return [cp_df.close_price.mean(), cp_df.close_price.std()]

In [144]:
def set_param_dict( data_dict ):
    
    return {k: get_bs_params(data_dict[k][0]) for k in data_dict.keys()}
    
    

In [145]:
p_dict = set_param_dict(master_dict)

In [146]:
p_dict

{'AYI': [206.29040553700034, 37.059024441712687],
 'CMC': [18.837813596875787, 2.3596742316822463],
 'CUDA': [23.404140624999997, 1.7366861066164176],
 'GPN': [85.068847557255467, 10.920117020850642]}