In [139]:
from datetime import datetime as dt
from edgar import Company
import lxml.html as lh
import pandas as pd
import pandas_datareader.data as pdr

In [200]:
def get_ticker_to_cik():
    # local copy: data/ticker_to_cik.txt
    ticker_to_cik = pd.read_csv('https://www.sec.gov/include/ticker.txt',
                                sep='\t', header=None, names=['ticker','cik'])
    ticker_to_cik['ticker'] = ticker_to_cik.ticker.str.upper()
    ticker_to_cik['cik'] = ticker_to_cik.cik.astype(str)
    return ticker_to_cik

def get_cik_to_name():
    # local copy: data/cik_to_name.json
    cik_to_name = pd.read_json('https://www.sec.gov/files/company_tickers.json').transpose()
    cik_to_name['ticker'] = cik_to_name.ticker.str.upper()
    cik_to_name['cik'] = cik_to_name.cik_str.astype(str)
    return cik_to_name

def process_spac_lists(file_path_current, file_path_past, write=False):
    # current spac list
    spac_list_current = pd.read_csv(file_path_current)
    spac_list_current = spac_list_current.Ticker.unique()
    spac_list_current = pd.DataFrame(spac_list_current, columns=['Ticker'])
    
    # past spac list (completed business combination)
    spac_list_past = pd.read_csv(file_path_past)
    spac_list_past.fillna('missing', inplace=True)
    spac_list_past['dupe_filter'] = spac_list_past['Old Ticker'] + spac_list_past['New Ticker']
    spac_list_past = spac_list_past[spac_list_past.dupe_filter.isin(spac_list_past.dupe_filter.unique())]
    spac_list_past.drop(columns=['dupe_filter'], inplace=True)
    
    # get ticker to cik and cik to company name file, then merge
    ticker_to_cik = get_ticker_to_cik()
    cik_to_name = get_cik_to_name()
    spac_list_past = spac_list_past.merge(ticker_to_cik, how='left', left_on='New Ticker', right_on='ticker')
    spac_list_past = spac_list_past.merge(cik_to_name[['cik','ticker','title']], how='left', on=['cik','ticker'])
    spac_list_current = spac_list_current.merge(ticker_to_cik, how='left', left_on='Ticker', right_on='ticker')
    spac_list_current = spac_list_current.merge(cik_to_name[['cik','ticker','title']], how='left', on=['cik','ticker'])
    
    # write to file
    if write==True:
        spac_list_current.to_csv('spac_list_current.csv', index=False)
        spac_list_past.to_csv('spac_list_past.csv', index=False)
    return spac_list_current, spac_list_past

def form_html_to_text(forms_html):
    forms_text = []
    for form_html in forms_html:
        form_text = form_html.text_content().replace('\n',' ').replace('\xa0',' ').lower()
        forms_text.append(form_text)
    return forms_text

def create_date_text_df(forms_text, form_type):
    df = pd.DataFrame()
    for form_text in forms_text:
        try:
            split_text = form_text.split('date of report (date of earliest event reported): ')[1].split(', ')
            date_string = split_text[0].replace(' ','') + ', ' + split_text[1].replace(' ','')[0:4]
            date_dt = dt.strptime(date_string, '%B%d, %Y')
            df = df.append(pd.Series([date_dt, form_type, form_text]), ignore_index=True)
            print(date_dt, 'form added')
        except:
            print('Logic to find date broke. See text:\n', form_text)
    df.columns = ['date','form','text']
    return df

def get_forms_text(company_name, cik_id, form_type):
    print(company_name)
    company = Company(company_name, cik_id)
    print('url to forms:', company.get_filings_url(filing_type=form_type, ownership='include', no_of_entries=100))
    forms_site_html = company.get_all_filings(filing_type=form_type, ownership='include', no_of_entries=100)
    forms_html = company.get_documents(forms_site_html, no_of_documents=100, debug=False)
    forms_text = form_html_to_text(forms_html)
    df = create_date_text_df(forms_text, form_type)
    return df

def simple_text_match(df_form, substring):
    df_form[substring.replace(' ','_')+'_found'] = df_form.text.apply(lambda x: 1 if substring in x else 0)
    return df_form

def get_historical_prices(symbol, start_date, end_date):
    start_split = start_date.split('-')
    end_split = end_date.split('-')
    start = dt(int(start_split[0]), int(start_split[1]), int(start_split[2]))
    end = dt(int(end_split[0]), int(end_split[1]), int(end_split[2]))
    df_prices = web.DataReader(name=symbol, data_source='yahoo', start=start, end=end)
    return df_prices

In [201]:
spac_list_current, spac_list_past = process_spac_lists(file_path_current='data/spac_list_current.csv',
                                                       file_path_past='data/spac_list_past.csv',
                                                       write=False)
display(spac_list_current.head())
display(spac_list_past.head())

Unnamed: 0,Ticker,ticker,cik,title
0,ACAM,ACAM,1759008,Acamar Partners Acquisition Corp.
1,ACTT,ACTT,1753706,"Whole Earth Brands, Inc."
2,ALAC,ALAC,1748621,Alberton Acquisition Corp
3,ALUS,ALUS,1781115,Alussa Energy Acquisition Corp.
4,AMCI,AMCI,1744494,AMCI Acquisition Corp.


Unnamed: 0,Old Ticker,New Ticker,Closing Date,ticker,cik,title
0,GPAQ,HOFV,2020,HOFV,1791995,"GPAQ Acquisition Holdings, Inc."
1,ARYA,IMTX,2020,IMTX,1809196,Immatics B.V.
2,PAAC,LGHL,2020,LGHL,1806524,Lion Group Holding Ltd
3,NEBU,LPRO,2020,LPRO,1806201,Open Lending Corp
4,VTIQ,NKLA,2020,NKLA,1731289,Nikola Corp


In [202]:
spac_list_current[spac_list_current.ticker=='FMCI']

Unnamed: 0,Ticker,ticker,cik,title
27,FMCI,FMCI,1741231,Forum Merger II Corp


In [159]:
df_form_8K = get_forms_text(company_name='Forum Merger II Corp', cik_id='1741231', form_type='8-K')
# df_form_8K = get_forms_text(company_name='Nikola Corp', cik_id='0001731289', form_type='8-K')
df_form_8K = simple_text_match(df_form_8K, 'letter of intent')
df_form_8K = simple_text_match(df_form_8K, 'business combination agreement')
df_form_8K

Forum Merger II Corp
url to forms: https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=1741231&type=8-K&dateb=&owner=include&count=100
2020-06-22 00:00:00 form added
2020-06-11 00:00:00 form added
2020-06-12 00:00:00 form added
2020-06-08 00:00:00 form added
2020-06-03 00:00:00 form added
2020-05-13 00:00:00 form added
2020-01-07 00:00:00 form added
2020-01-02 00:00:00 form added
2018-09-11 00:00:00 form added
2018-08-13 00:00:00 form added
2018-08-08 00:00:00 form added


Unnamed: 0,date,form,text,letter_of_intent_found,business_combination_agreement_found
0,2020-06-22,8-K,8-k 1 ea123288-8k_forummerger2.htm current re...,0,0
1,2020-06-11,8-K,8-k 1 ea122974-8k_forummerger2.htm current re...,0,0
2,2020-06-12,8-K,8-k 1 ea122985-8k_forummerger2.htm current re...,0,0
3,2020-06-08,8-K,8-k 1 ea122807-8k_forummerger2.htm current re...,0,0
4,2020-06-03,8-K,8-k 1 ea122609-8k_forummerger2.htm current re...,1,0
5,2020-05-13,8-K,8-k 1 ea121761-8k_forummergii.htm current rep...,1,0
6,2020-01-07,8-K,8-k 1 f8k010720_forummerger2.htm current repo...,0,0
7,2020-01-02,8-K,8-k 1 f8k010220_forummerger2.htm current repo...,0,0
8,2018-09-11,8-K,8-k 1 f8k091118_forummerger2.htm current repo...,0,0
9,2018-08-13,8-K,8-k 1 f8k080718_forummerger2.htm current repo...,0,0


In [129]:
df_prices = get_historical_prices(symbol='FMCI', start_date='2020-01-01', end_date='2020-06-10')
df_prices['Close_t+1'] = df_prices.Close.shift(-1)
df_prices['Close_t+3'] = df_prices.Close.shift(-3)
df_prices['Open_Close_t+1_%chg'] = (df_prices['Close_t+1'] - df_prices['Open']) / df_prices['Open']
df_prices['Open_Close_t+3_%chg'] = (df_prices['Close_t+3'] - df_prices['Open']) / df_prices['Open']
df_prices[df_prices.index>='2020-06-01'].head()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close,Close_t+1,Close_t+3,Open_Close_t+1_%chg,Open_Close_t+3_%chg
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-06-01,13.4,12.08,12.9,12.17,3613300,12.17,11.33,12.34,-0.121705,-0.043411
2020-06-02,12.47,11.24,12.43,11.33,3348500,11.33,11.84,12.41,-0.047466,-0.001609
2020-06-03,11.95,11.28,11.69,11.84,2994300,11.84,12.34,14.25,0.055603,0.218991
2020-06-04,12.67,11.9,11.93,12.34,2734400,12.34,12.41,14.5,0.040235,0.215423
2020-06-05,12.78,12.1,12.5,12.41,1800000,12.41,14.25,14.72,0.14,0.1776
