## Description:

This notebook will scrape `http://www.nasdaq.com/earnings/earnings-calendar.aspx` in order to obtain data regarding upcoming earnings announcements. <br>

http://www.nasdaq.com/earnings/earnings-calendar.aspx?date=2018-Jan-17 <br>

Notice the how the date is formatted at the end of the URL.


In [83]:
# import libs
import urllib.request
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import quandl
import datetime

# get the ticker within the paranthesis
def get_ticker( s ):

    return s[s.find("(")+1:s.find(")")]
    

# date must be: yyyy-mmm-dd (i.e. 2018-Jan-16)
def scrape_build_df( date ):
    
    url = "http://www.nasdaq.com/earnings/earnings-calendar.aspx?date=" + date
    
    df = pd.read_html(url)[3]
    df.columns = ['Time','Company/MktCap','ExpReportDate','QuarterEnd','EPS_Exp',
                  'no_Est','LastYearReportDate','LastYearEPS','pct_surprise']
    
    del df['Time']
    del df['LastYearReportDate']
    del df['LastYearEPS']
    del df['pct_surprise']
    
    df['Ticker'] = np.vectorize(get_ticker)(df['Company/MktCap'])
    df = df.set_index('Ticker')
    
    df = df.sort_values("no_Est", ascending=False)
    df = df[df.no_Est >= 6]
    
    universe = df.index.values.tolist()
    final_universe = [(stock, df.loc[stock]['EPS_Exp'], df.loc[stock]['no_Est']) 
                      for stock in universe]

    return df, universe, final_universe

# use quandle to retrieve stock data
quandl.ApiConfig.api_key = 'j9T_hnwjKBytmt7sEdV6'
def quandl_stocks( symbol, start_date=(2000, 1, 1), end_date=None ):
    """
    symbol is a string representing a stock symbol, e.g. 'AAPL'
 
    start_date and end_date are tuples of integers representing the year, month,
    and day
 
    end_date defaults to the current date when None
    """
 
    query_list = ['WIKI' + '/' + symbol + '.' + str(k) for k in range(1, 13)]
 
    start_date = datetime.date(*start_date)
 
    if end_date:
        end_date = datetime.date(*end_date)
    else:
        end_date = datetime.date.today()
 
    return quandl.get(query_list, 
            returns='pandas', 
            start_date=start_date,
            end_date=end_date,
            collapse='daily',
            order='asc'
            )

# function to handle stock data retrieval errors
def stock_handler( stock, start_date=(2016, 7, 9) ):

    try:
        stock_df = quandl_stocks(stock, start_date=(2016, 7, 9))
        adj_df, curr_price = adjust_df( stock, stock_df )
        
        return adj_df, curr_price

    except ValueError:
        return pd.DataFrame()
    
# mutate the "stock_df" in the above so it is 
# simply the series of Adj Close prices
def adjust_df( ticker, stock_df ):
    
    # Make dataframe of returns, define as close price
    adj_df = pd.DataFrame(stock_df['WIKI/' + ticker + ' - Adj. Close'])
    adj_df.columns = ['close_price']

    # Get pct change
    adj_df['pct_change'] = pd.DataFrame.pct_change(adj_df['close_price'])
    
    return adj_df, adj_df.close_price[-1]
    
    
# build master dataframe    
def get_master_dict( universe ):
    
    return {elm[0]: [stock_handler(elm[0], start_date=(2016, 7, 9)), elm[1], elm[2]]
            for elm in universe if not stock_handler(elm[0], start_date=(2016, 7, 9))[0].empty}
    
# main function
def driver( date ):
    
    return get_master_dict(scrape_build_df(date)[2])

In [84]:
# Testing all of the above functions
date = "2017-Jan-09"
master_dict = driver(date)

In [88]:
master_dict['GPN'][0]

(            close_price  pct_change
 Date                               
 2016-07-11    74.751998         NaN
 2016-07-12    75.391587    0.008556
 2016-07-13    75.631433    0.003181
 2016-07-14    76.430920    0.010571
 2016-07-15    76.670766    0.003138
 2016-07-18    76.710740    0.000521
 2016-07-19    76.720734    0.000130
 2016-07-20    77.869996    0.014980
 2016-07-21    77.160451   -0.009112
 2016-07-22    77.760066    0.007771
 2016-07-25    78.269739    0.006554
 2016-07-26    79.179155    0.011619
 2016-07-27    78.379668   -0.010097
 2016-07-28    78.139822   -0.003060
 2016-07-29    74.612088   -0.045146
 2016-08-01    73.662697   -0.012724
 2016-08-02    71.454116   -0.029982
 2016-08-03    73.113050    0.023217
 2016-08-04    73.512794    0.005467
 2016-08-05    74.492165    0.013322
 2016-08-08    73.892550   -0.008049
 2016-08-09    74.841940    0.012848
 2016-08-10    74.672049   -0.002270
 2016-08-11    74.811978    0.001874
 2016-08-12    74.652059   -0.002138
 

In [89]:
def get_bs_params( cp_df ):
    
    # Note: skipna = True by default, so the first row is not an issue
    return [cp_df[0]['pct_change'].mean(), cp_df[0]['pct_change'].std(), cp_df[1]]

In [90]:
def set_param_dict( data_dict ):
    
    return {k: get_bs_params(data_dict[k][0]) for k in data_dict.keys()}
    
    

In [91]:
p_dict = set_param_dict(master_dict)

In [92]:
p_dict

{'AYI': [-0.0010415016418895565, 0.021540181759649401, 163.15000000000001],
 'CMC': [0.001300509126936006, 0.024138689411994527, 24.82],
 'CUDA': [0.0011388444695931237, 0.022424280057456853, 27.550000000000001],
 'GPN': [0.0010457247866145014, 0.013203834352762329, 108.01000000000001]}

In [93]:
gpn_mu = p_dict['GPN'][0]; gpn_sig = p_dict['GPN'][1]; gpn_price = p_dict['GPN'][2]

In [106]:
def simulate_price( mu, sig, s_t ):
    
    avg_sT = list()
    # Run 1000 simulations
    for i in range(1000):
    
        T = 10 # number of periods to simulate
        N = 100 # number of scenarios
        epsilon = np.random.normal(size=[T, N])
        paths = s_t*np.exp(np.cumsum(mu-0.5*(sig**2) + sig*epsilon, axis=0))

        # output
        #print('data from %s to %s' % (data.index[0].date(), data.index[-1].date()))
        #print('%d scenarios of %d periods' % (N, T))
        avg_sT.append( paths[-1].mean() )
        
    return ( np.array(avg_sT).mean() )

In [111]:
estimated_price_dict = {k : simulate_price(p_dict[k][0], p_dict[k][1], p_dict[k][2]) for k in p_dict.keys()}
    

In [117]:
df = pd.DataFrame.from_dict(estimated_price_dict, orient='index')
df.columns = ['Simulated Price']

In [118]:
df

Unnamed: 0,Simulated Price
GPN,109.166057
AYI,161.477506
CUDA,27.86736
CMC,25.156916
