# Dependencies

In [57]:
# import libraries
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
from bs4 import BeautifulSoup
import requests
import time
from datetime import datetime
import json

import pandas as pd 
import numpy as np
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine

In [2]:
# local dependencies
import stock_functions as sf

# Scrape IPOs

__IPO Scoop Scrape__

In [34]:
# IPO Scoop Upcoming IPOs
url = 'https://www.iposcoop.com/ipo-calendar/'
data = pd.read_html(url)

ipo_scoop_upcoming_df = data[0]
ipo_scoop_upcoming_df.head()

Unnamed: 0,Company,Symbol proposed,Lead Managers,Shares (Millions),Price Low,Price High,Est. $ Volume,Expected to Trade,SCOOP Rating,Rating Change
0,No entries were found.,No entries were found.,No entries were found.,No entries were found.,No entries were found.,No entries were found.,No entries were found.,No entries were found.,No entries were found.,No entries were found.


In [35]:
# exception handling if no entries are in upcoming IPO table
if ipo_scoop_upcoming_df["Company"][0] == "No entries were found.":
    ipo_scoop_upcoming_df = ipo_scoop_upcoming_df.drop([0], axis=0)
    
# if table has values, process updates to dataframe
else:
    # rename symbol proposed with symbol
    ipo_scoop_upcoming_df.rename(columns={'Symbol proposed':'Symbol'}, inplace=True)
    ipo_scoop_upcoming_df.head()

    # replace 'week of' text from expected to trade column if present
    ipo_scoop_upcoming_df['Expected to Trade'] = ipo_scoop_upcoming_df['Expected to Trade'].str.replace(' Week of', '')
    ipo_scoop_upcoming_df

    # split expected trade date to date and day of week
    ipo_scoop_upcoming_df[['Offer Date','Expected Trade Weekday']] = ipo_scoop_upcoming_df['Expected to Trade'].str.split(' ',expand=True)

    # add date type column to differentiate confirmed vs expected
    ipo_scoop_upcoming_df['date_type'] = "Expected"
    ipo_scoop_upcoming_df['source'] = "IPO Scoop"

ipo_scoop_upcoming_df.head(2)


Unnamed: 0,Company,Symbol proposed,Lead Managers,Shares (Millions),Price Low,Price High,Est. $ Volume,Expected to Trade,SCOOP Rating,Rating Change


In [23]:
# IPO Scoop Upcoming IPOs - reduce to primary info
#ipo_scoop_upcoming_df = ipo_scoop_upcoming_df[["Symbol", "Company", "Offer Date", "date_type"]]
#ipo_scoop_upcoming_df = ipo_scoop_upcoming_df.rename(columns={"Symbol": "symbol", "Company": "company", "Offer Date": "offer_date"})
                                
#ipo_scoop_upcoming_df.head()

In [36]:
# IPO Scoop Recent IPOs
url = 'https://www.iposcoop.com/last-100-ipos'
data = pd.read_html(url)

ipo_scoop_recent_df = data[0]

# add date type column to differentiate confirmed vs expected
ipo_scoop_recent_df['date_type'] = "Confirmed"
ipo_scoop_recent_df['source'] = "IPO Scoop"

ipo_scoop_recent_df = ipo_scoop_recent_df.rename(columns={"Symbol": "symbol", "Company": "company", "Offer Date": "offer_date"})

ipo_scoop_recent_df.head()

Unnamed: 0,company,symbol,Industry,offer_date,Shares (millions),Offer Price,1st Day Close,Current Price,Return,SCOOP Rating,date_type,source
0,Harbor Custom Development,HCDI,Financials,8/28/2020,1.8,$6.00,$7.50,$7.50,25.00%,S/O,Confirmed,IPO Scoop
1,XPeng,XPEV,Consumer Goods,8/27/2020,99.7,$15.00,$21.22,$22.79,51.93%,S/O,Confirmed,IPO Scoop
2,Kymera Therapeutics,KYMR,Health Care,8/21/2020,8.7,$20.00,$33.26,$33.28,66.40%,S/O,Confirmed,IPO Scoop
3,Nano-X Imaging Ltd.,NNOX,Health Care,8/21/2020,9.2,$18.00,$21.70,$25.98,44.33%,S/O,Confirmed,IPO Scoop
4,Harmony Biosciences Holdings,HRMY,Health Care,8/19/2020,5.4,$24.00,$37.01,$35.20,46.67%,S/O,Confirmed,IPO Scoop


In [26]:
# IPO Scoop Recent IPOs - reduce to primary info
#ipo_scoop_recent_df = ipo_scoop_recent_df[["Symbol", "Company", "Offer Date", "date_type"]]
          
#ipo_scoop_recent_df.head()

__Nasdaq Scrape__

In [37]:
current_year_month = datetime.today().strftime('%Y-%m')
current_year_month

'2020-08'

In [38]:
# scrape nasdaq https://api.nasdaq.com/api/ipo/calendar?date=2020-08
# note, had to create headers due to time out, solution found here: https://stackoverflow.com/questions/46862719/pythons-requests-library-timing-out-but-getting-the-response-from-the-browser
url = f'https://api.nasdaq.com/api/ipo/calendar?date={current_year_month}'
headers = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.1 Safari/605.1.15","Accept-Language": "en-gb","Accept-Encoding":"br, gzip, deflate","Accept":"test/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8","Referer":"http://www.google.com/"}

response = requests.get(url, headers=headers)
data = response.text
data = json.loads(data)
print(data)

{'data': {'priced': {'headers': {'proposedTickerSymbol': 'Symbol', 'companyName': 'Company Name', 'proposedExchange': 'Exchange/ Market', 'proposedSharePrice': 'Price', 'sharesOffered': 'Shares', 'pricedDate': 'Date', 'dollarValueOfSharesOffered': 'Offer Amount', 'dealStatus': 'Actions'}, 'rows': [{'dealID': '1124020-93258', 'proposedTickerSymbol': 'BTAQU', 'companyName': 'Burgundy Technology Acquisition Corp', 'proposedExchange': 'NASDAQ Capital', 'proposedSharePrice': '10.00', 'sharesOffered': '30,000,000', 'pricedDate': '08/27/2020', 'dollarValueOfSharesOffered': '$300,000,000', 'dealStatus': 'Priced'}, {'dealID': '1124662-93358', 'proposedTickerSymbol': 'CFIIU', 'companyName': 'CF Finance Acquisition Corp II', 'proposedExchange': 'NASDAQ Capital', 'proposedSharePrice': '10.00', 'sharesOffered': '50,000,000', 'pricedDate': '08/27/2020', 'dollarValueOfSharesOffered': '$500,000,000', 'dealStatus': 'Priced'}, {'dealID': '1124749-93369', 'proposedTickerSymbol': 'XPEV', 'companyName': 'X

In [39]:
priced_ipos = data["data"]["priced"]["rows"]

symbol_list = []
company_list = []
offer_date_list = []
market_cap_list = []

for x in range(len(priced_ipos)):
    symbol_list.append(priced_ipos[x]["proposedTickerSymbol"])
    company_list.append(priced_ipos[x]["companyName"])
    offer_date_list.append(priced_ipos[x]["pricedDate"])
    market_cap_list.append(priced_ipos[x]["dollarValueOfSharesOffered"])
    

# dataframe with stock info
nasdaq_priced_df = pd.DataFrame({"symbol" : symbol_list, 
                   "company" : company_list, 
                   "offer_date" : offer_date_list,
                   "market_cap_offered" : market_cap_list
                  })

nasdaq_priced_df["date_type"] = "Confirmed"
nasdaq_priced_df['source'] = "Nasdaq"

nasdaq_priced_df.head()

Unnamed: 0,symbol,company,offer_date,market_cap_offered,date_type,source
0,BTAQU,Burgundy Technology Acquisition Corp,08/27/2020,"$300,000,000",Confirmed,Nasdaq
1,CFIIU,CF Finance Acquisition Corp II,08/27/2020,"$500,000,000",Confirmed,Nasdaq
2,XPEV,XPENG INC.,08/27/2020,"$1,496,000,010",Confirmed,Nasdaq
3,FTOCU,FTAC Olympus Acquisition Corp.,08/26/2020,"$750,000,000",Confirmed,Nasdaq
4,FST'U,FAST Acquisition Corp.,08/21/2020,"$200,000,000",Confirmed,Nasdaq


In [40]:
upcoming_ipos = data["data"]["upcoming"]["upcomingTable"]["rows"]

symbol_list = []
company_list = []
offer_date_list = []
market_cap_list = []

for x in range(len(upcoming_ipos)):
    symbol_list.append(upcoming_ipos[x]["proposedTickerSymbol"])
    company_list.append(upcoming_ipos[x]["companyName"])
    offer_date_list.append(upcoming_ipos[x]["expectedPriceDate"])
    market_cap_list.append(upcoming_ipos[x]["dollarValueOfSharesOffered"])
    

# dataframe with stock info
nasdaq_upcoming_df = pd.DataFrame({"symbol" : symbol_list, 
                   "company" : company_list, 
                   "offer_date" : offer_date_list,
                   "market_cap_offered" : market_cap_list
                  })

nasdaq_upcoming_df["date_type"] = "Expected"
nasdaq_upcoming_df['source'] = "Nasdaq"

nasdaq_upcoming_df.head()

Unnamed: 0,symbol,company,offer_date,market_cap_offered,date_type,source
0,AUVI,"Applied UV, Inc.",08/31/2020,"$5,750,000",Expected,Nasdaq
1,,"Sun BioPharma, Inc.",08/28/2020,"$10,500,000",Expected,Nasdaq
2,PAICU,Petra Acquisition Inc.,08/28/2020,"$86,250,000",Expected,Nasdaq
3,HCDI,"Harbor Custom Development, Inc.",08/28/2020,"$16,428,568",Expected,Nasdaq


In [None]:
# trim to most relevant columns
#nasdaq_priced_df = nasdaq_priced_df[["symbol", "company", "offer_date", "date_type"]]
#nasdaq_upcoming_df = nasdaq_upcoming_df[["symbol", "company", "offer_date", "date_type"]]

__Combine IPO Dataframes__

In [41]:
# combine IPO dataframes
ipo_df = pd.concat([ipo_scoop_recent_df, ipo_scoop_upcoming_df, nasdaq_priced_df, nasdaq_upcoming_df], ignore_index=True, sort=False)
print(ipo_df.dtypes)
ipo_df

company                object
symbol                 object
Industry               object
offer_date             object
Shares (millions)     float64
Offer Price            object
1st Day Close          object
Current Price          object
Return                 object
SCOOP Rating           object
date_type              object
source                 object
Company                object
Symbol proposed        object
Lead Managers          object
Shares (Millions)      object
Price Low              object
Price High             object
Est. $ Volume          object
Expected to Trade      object
Rating Change          object
market_cap_offered     object
dtype: object


Unnamed: 0,company,symbol,Industry,offer_date,Shares (millions),Offer Price,1st Day Close,Current Price,Return,SCOOP Rating,...,Company,Symbol proposed,Lead Managers,Shares (Millions),Price Low,Price High,Est. $ Volume,Expected to Trade,Rating Change,market_cap_offered
0,Harbor Custom Development,HCDI,Financials,8/28/2020,1.8,$6.00,$7.50,$7.50,25.00%,S/O,...,,,,,,,,,,
1,XPeng,XPEV,Consumer Goods,8/27/2020,99.7,$15.00,$21.22,$22.79,51.93%,S/O,...,,,,,,,,,,
2,Kymera Therapeutics,KYMR,Health Care,8/21/2020,8.7,$20.00,$33.26,$33.28,66.40%,S/O,...,,,,,,,,,,
3,Nano-X Imaging Ltd.,NNOX,Health Care,8/21/2020,9.2,$18.00,$21.70,$25.98,44.33%,S/O,...,,,,,,,,,,
4,Harmony Biosciences Holdings,HRMY,Health Care,8/19/2020,5.4,$24.00,$37.01,$35.20,46.67%,S/O,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
143,Health Sciences Acquisitions Corp 2,HSAQ,,08/04/2020,,,,,,,...,,,,,,,,,,"$139,130,440"
144,"Applied UV, Inc.",AUVI,,08/31/2020,,,,,,,...,,,,,,,,,,"$5,750,000"
145,"Sun BioPharma, Inc.",,,08/28/2020,,,,,,,...,,,,,,,,,,"$10,500,000"
146,Petra Acquisition Inc.,PAICU,,08/28/2020,,,,,,,...,,,,,,,,,,"$86,250,000"


In [44]:
# drop unnecessary columns if they exist
ipo_df = ipo_df.drop(['Company','Symbol proposed','Expected to Trade','Rating Change', 'Lead Managers'], axis=1, errors='ignore')
ipo_df.head()

Unnamed: 0,company,symbol,Industry,offer_date,Shares (millions),Offer Price,1st Day Close,Current Price,Return,SCOOP Rating,date_type,source,Shares (Millions),Price Low,Price High,Est. $ Volume,market_cap_offered
0,Harbor Custom Development,HCDI,Financials,2020-08-28,1.8,$6.00,$7.50,$7.50,25.00%,S/O,Confirmed,IPO Scoop,,,,,
3,Nano-X Imaging Ltd.,NNOX,Health Care,2020-08-21,9.2,$18.00,$21.70,$25.98,44.33%,S/O,Confirmed,IPO Scoop,,,,,
4,Harmony Biosciences Holdings,HRMY,Health Care,2020-08-19,5.4,$24.00,$37.01,$35.20,46.67%,S/O,Confirmed,IPO Scoop,,,,,
5,Inhibrx,INBX,Health Care,2020-08-19,7.0,$17.00,$20.63,$16.84,-0.94%,S/O,Confirmed,IPO Scoop,,,,,
6,CureVac B.V.,CVAC,Health Care,2020-08-14,13.3,$16.00,$55.90,$59.15,269.69%,S/O,Confirmed,IPO Scoop,,,,,


In [46]:
# convert offer date to datetime datatype
ipo_df['offer_date'] = pd.to_datetime(ipo_df['offer_date'], format="%m/%d/%Y")
ipo_df = ipo_df.sort_values(by='date_type', ascending=True) # sort by date_type to keep "confirmed" values for duplicates if results differ

In [None]:
# drop duplicate symbols, if there is a confirmed keep the first so that "expected" is dropped
ipo_df = ipo_df.drop_duplicates(subset='symbol', keep="first")
ipo_df.head()

__Add New Stocks to Database__

In [4]:
# Bring in ipo table
#engine = create_engine('postgresql://postgres:postgres@localhost:5432/IPO_tracker')
#connection = engine.connect()
#sql_ipo_df = pd.read_sql("SELECT * FROM ipo", connection)
#sql_ipo_df.head()

Unnamed: 0,id,symbol,company,offer_date,date_type,market_cap_offered,first_day_close
0,1,ARYA,ARYA Sciences Acquisition Corp III,2020-08-07,Confirmed,,
1,2,CMPI,Checkmate Pharmaceuticals,2020-08-07,Confirmed,,
2,3,FRLN,Freeline Therapeutics Holdings plc,2020-08-07,Confirmed,,
3,4,IBEX,IBEX Ltd.,2020-08-07,Confirmed,,
4,5,AFIB,Acutus Medical,2020-08-06,Confirmed,,


In [5]:
# find expected date_types - update to offer_date and date_type if confirmed
sql_ipo_df_expected = sql_ipo_df.loc[sql_ipo_df["date_type"] == "Expected"]
sql_ipo_df_expected

Unnamed: 0,id,symbol,company,offer_date,date_type,market_cap_offered,first_day_close
100,101,KBNT,Kubient (stock),2020-08-11,Expected,,
101,102,KBNTW,Kubient (warrants),2020-08-11,Expected,,
102,103,FAII.U,Fortress Value Acquisition Corp. II,2020-08-12,Expected,,
103,104,FSDC,FS Development,2020-08-12,Expected,,
104,105,SQFT,Presidio Property Trust,2020-08-12,Expected,,
105,106,BEKE,KE Holdings,2020-08-13,Expected,,
106,107,NTST,NETSTREIT,2020-08-13,Expected,,
107,108,CVAC,CureVac N.V.,2020-08-14,Expected,,
108,109,DCT,Duck Creek Technologies,2020-08-14,Expected,,
126,127,DCT,"DUCK CREEK TECHNOLOGIES, INC.",2020-08-14,Expected,,


In [None]:
new_ipos_df = ipo_df[~ipo_df["symbol"].isin(sql_ipo_df["symbol"])]
new_ipos_df

In [None]:
# load data
#engine = create_engine('postgresql://postgres:postgres@localhost:5432/IPO_tracker')
#new_ipos_df.to_sql('ipo', con=engine, if_exists='append', index=False)

# Stock Detail & Performance
At this point this is a proof of concept, showing how we will pull the data.

May include thinks like:
1. Stock price
2. Market cap (may choose to exclude smaller cap new stocks to limit to biggest and more interesting IPOs)
3. Launch date open and close price
4. Stock attribute information (tech vs consumer goods vs any summary statement available?)

In [None]:
# set up as a sample for now, will need to determine when to scrape stock info and what to keep
sample_symbols = [ipo_df.iloc[20]['symbol'],
                  ipo_df.iloc[25]['symbol']
                 ]
sample_symbols

In [None]:
# sample loop through symbols

# empty list to hold data
current_price_list = []
market_cap_list = []
json_stock_data = []   # full json of stock data, unsure if needed

# loop through symbols and get data for each
for symbol in sample_symbols:
    r = requests.get(f'https://query2.finance.yahoo.com/v10/finance/quoteSummary/{symbol}?formatted=true&crumb=8ldhetOu7RJ&lang=en-US&region=US&modules=defaultKeyStatistics%2CfinancialData%2CcalendarEvents&corsDomain=finance.yahoo.com')
    data = r.json()
    
    # get stats from the dataset
    price = data['quoteSummary']['result'][0]['financialData']['currentPrice']['raw']
    market_cap = data['quoteSummary']['result'][0]['defaultKeyStatistics']['enterpriseValue']['raw']
    
    market_cap_list.append(market_cap)
    current_price_list.append(price)
    json_stock_data.append(data)

print(current_price_list)
print(market_cap_list)

In [None]:
# dataframe with new stock info
df = pd.DataFrame({"sample_symbols" : sample_symbols, 
                   "current_price" : current_price_list, 
                   "market_cap" : market_cap_list,
                   "date" : datetime.today().strftime('%Y-%m-%d')
                  
                  })
df

# Postgres

__Postgresql__

In [58]:
# create engine
engine = create_engine('postgresql://postgres:postgres@localhost:5432/IPO_tracker')
#ipo_df.to_sql('ipo', con=engine, if_exists='append', index=False)

In [62]:
# Declare a Base using `automap_base()`
Base = automap_base()

In [63]:
# Use the Base class to reflect the database tables
Base.prepare(engine, reflect=True)

In [64]:
# Print all of the classes mapped to the Base
Base.classes.keys()

['stock_performance', 'ipo']

In [65]:
# Assign the ipo class to a variable called `IPO`
IPO = Base.classes.ipo

In [66]:
# Create a session
session = Session(engine)

In [77]:
# Use the session to query ipo table and display the first 5 symbols
for r in session.query(IPO.symbol, IPO.company).limit(5).all():
    print(r)

('HCDI', 'Harbor Custom Development')
('HUIZ', 'Huize Holding Ltd.')
('NREF', 'NexPoint Real Estate Finance')
('ONEW', 'OneWater Marine')
('BEAM', 'Beam Therapeutics')


In [99]:
# testing update
stock_to_update = session.query(IPO.symbol, IPO.company).filter(IPO.symbol == 'HCDI').\
    update({"company": "this is annoying"}, synchronize_session='evaluate')

In [101]:
# oh wow this commited the update
session.commit()
session.close()

# Analysis

Information that may be interesting to share.  Examples include:
1. Timing of when it launches, how long its been, etc.
2. Price performance
    - Launch date open and close price (how they did on first day)
    - How did it do when its hit 1 month, 3 month, 6 month, 1 year milestone
3. Industry perormance
    - Did it outperform the S&P 
    - Did it outperform they sector (Ex: tech, consumer goods)
4. Top performers
    - Which IPOs did best in last 1 month, 3 month, 6 month, 1 year milestone

In [None]:
# sample analysis for open to close change for one day
appl = sample_df.loc[(sample_df["Ticker"] == "AAPL") & (sample_df["Date"] == "07012020")]
appl_day_change = appl["Close_Price"] / appl["Open_Price"] -1
print(appl_day_change)

In [None]:
# Calculate based on MongoDB record
documents = collection.find({})
response = []
for document in documents:
    try:
        document['_id'] = str(document['_id'])
        response.append(document)
    except:
        response.append(None)
        log.info(f'Could not find {document}')
        
# Example field reference for first record
price_high = response[0]["price_high"]
price_high

In [None]:
# Print text for each record

for i in range(0,len(response)):
  print(f"{response[i]['expected_to_trade']}: {response[i]['company']} [{response[i]['symbol_proposed']}]. Price (Low-High): ${response[i]['price_low']}-{response[i]['price_high']}. #new_ipo_{response[i]['symbol_proposed']}")  

In [None]:
# Establish Twitter connection

import tweepy

CONSUMER_KEY = "consumer_key"
CONSUMER_SECRET = "consumer_secret"   
ACCESS_KEY = "access_key"    
ACCESS_SECRET = "access_secret"

auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)

api = tweepy.API(auth)

In [None]:
# Post a tweet for each record
for i in range(0,len(response)):
  new_tweet = f"{response[i]['expected_to_trade']}: {response[i]['company']} [{response[i]['symbol_proposed']}]. Price (Low-High): ${response[i]['price_low']}-{response[i]['price_high']}. #new_ipo_{response[i]['symbol_proposed']}" 
  api.update_status(new_tweet)      