# Dependencies

In [2]:
# import libraries
import requests
import time
from datetime import datetime
import json

import pandas as pd 
import numpy as np
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine

In [3]:
# local dependencies
from scrape_ipo import scrape_for_ipos

# Scrape IPOs

In [4]:
# get new IPOs dataframe
# ipo_df = scrape_for_ipos()
# ipo_df = ipo_df.reset_index(drop=True)
# ipo_df.to_csv('ipo_df.csv')
# ipo_df

ProgrammingError: (psycopg2.errors.UndefinedTable) relation "ipo" does not exist
LINE 1: SELECT * FROM ipo
                      ^

[SQL: SELECT * FROM ipo]
(Background on this error at: http://sqlalche.me/e/f405)

__Nasdaq Scrape__

In [5]:
current_year_month = datetime.today().strftime('%Y-%m')
current_year_month

'2020-12'

In [6]:
# scrape nasdaq, Ex: https://api.nasdaq.com/api/ipo/calendar?date=2020-08
# note, had to create headers due to time out, solution found here: https://stackoverflow.com/questions/46862719/pythons-requests-library-timing-out-but-getting-the-response-from-the-browser
url = f'https://api.nasdaq.com/api/ipo/calendar?date={current_year_month}'
headers = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.1 Safari/605.1.15","Accept-Language": "en-gb","Accept-Encoding":"br, gzip, deflate","Accept":"test/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8","Referer":"http://www.google.com/"}

response = requests.get(url, headers=headers)
data = response.text
data = json.loads(data)
print(url)

https://api.nasdaq.com/api/ipo/calendar?date=2020-12


In [7]:
# create list of scraped dataframes to concatenate
scraped_ipo_dfs = []

# gets priced IPOs for each record on nasdaq (values) = [(expression) for (value) in (collection)]
priced_ipos = data["data"]["priced"]["rows"]

# if there are priced IPOs create dataframe
if priced_ipos:
    
    symbols = [priced_ipos[x]["proposedTickerSymbol"] for x in range(len(priced_ipos))]
    company = [priced_ipos[x]["companyName"] for x in range(len(priced_ipos))]
    exchange = [priced_ipos[x]["proposedExchange"] for x in range(len(priced_ipos))]
    proposed_share_price = [priced_ipos[x]["proposedSharePrice"] for x in range(len(priced_ipos))]
    shares_offered = [priced_ipos[x]["sharesOffered"].replace(",", '') for x in range(len(priced_ipos))]
    priced_date = [priced_ipos[x]["pricedDate"] for x in range(len(priced_ipos))]
    dollar_val_shares = [priced_ipos[x]["dollarValueOfSharesOffered"].replace(",", '').replace("$",'') for x in range(len(priced_ipos))]

    # dataframe with stock info
    nasdaq_priced_df = pd.DataFrame({"symbol" : symbols,
                                       "company" : company,
                                       "exchange" : exchange, 
                                       "proposed_share_price" : proposed_share_price,
                                       "shares_offered" : shares_offered,
                                       "priced_date" : priced_date,
                                       "dollar_val_shares" : dollar_val_shares,
                                       "deal_status" : "priced"
                                       })
    scraped_ipo_dfs.append(nasdaq_priced_df)
    
else:
    print("no priced IPOs")

In [None]:
# gets lists for each record on nasdaq (values) = [(expression) for (value) in (collection)]

upcoming_ipos = data["data"]["upcoming"]["upcomingTable"]["rows"]

if upcoming_ipos:
    symbols = [upcoming_ipos[x]["proposedTickerSymbol"] for x in range(len(upcoming_ipos))]
    company = [upcoming_ipos[x]["companyName"] for x in range(len(upcoming_ipos))]
    exchange = [upcoming_ipos[x]["proposedExchange"] for x in range(len(upcoming_ipos))]
    proposed_share_price = [upcoming_ipos[x]["proposedSharePrice"] for x in range(len(upcoming_ipos))]
    shares_offered = [upcoming_ipos[x]["sharesOffered"].replace(",", '') for x in range(len(upcoming_ipos))]
    priced_date = [upcoming_ipos[x]["expectedPriceDate"] for x in range(len(upcoming_ipos))]
    dollar_val_shares = [upcoming_ipos[x]["dollarValueOfSharesOffered"].replace(",", '').replace("$",'') for x in range(len(upcoming_ipos))]

    # dataframe with stock info
    nasdaq_upcoming_df = pd.DataFrame({"symbol" : symbols,
                                       "company" : company,
                                       "exchange" : exchange, 
                                       "proposed_share_price" : proposed_share_price,
                                       "shares_offered" : shares_offered,
                                       "priced_date" : priced_date,
                                       "dollar_val_shares" : dollar_val_shares,
                                       "deal_status" : "expected"
                                       })
    scraped_ipo_dfs.append(nasdaq_upcoming_df)
    
else: 
    print("no upcoming IPOs")

__Combine IPO Dataframes__

In [8]:
# combine IPO dataframes
ipo_df = pd.concat(scraped_ipo_dfs, ignore_index=True, sort=False)

# change column datatypes
ipo_df[['shares_offered', 'dollar_val_shares']] = ipo_df[['shares_offered', 'dollar_val_shares']].apply(pd.to_numeric)
ipo_df['priced_date'] = pd.to_datetime(ipo_df['priced_date'], format="%m/%d/%Y")
ipo_df = ipo_df.sort_values(by='priced_date', ascending=True).reset_index(drop=True)
print(ipo_df.dtypes)
ipo_df = ipo_df.dropna()
ipo_df.head()

symbol                          object
company                         object
exchange                        object
proposed_share_price            object
shares_offered                   int64
priced_date             datetime64[ns]
dollar_val_shares              float64
deal_status                     object
dtype: object


Unnamed: 0,symbol,company,exchange,proposed_share_price,shares_offered,priced_date,dollar_val_shares,deal_status
0,RSVAU,Rodgers Silicon Valley Acquisition Corp,NASDAQ Capital,10.00,20000000,2020-12-02,200000000.0,priced
1,TACAU,Trepont Acquistion Corp I,NYSE,10.00,20000000,2020-12-02,200000000.0,priced
2,CAP'U,Capitol Investment Corp. V,NYSE,10.00,30000000,2020-12-02,300000000.0,priced
3,HTPAU,Highland Transcend Partners I Corp.,NYSE,10.00,27500000,2020-12-03,275000000.0,priced
4,FPACU,Far Peak Acquisition Corp,NYSE,10.00,55000000,2020-12-03,550000000.0,priced
5,LOKBU,Live Oak Acquisition Corp II,NYSE,10.00,22000000,2020-12-03,220000000.0,priced
6,KNTE,Kinnate Biopharma Inc.,NASDAQ Global Select,20.00,12000000,2020-12-03,240000000.0,priced
7,SPFRU,JAWS Spitfire Acquisition Corp,NYSE,10.00,30000000,2020-12-03,300000000.0,priced
8,PTICU,PROPTECH INVESTMENT CORP. II,NASDAQ Capital,10.00,20000000,2020-12-04,200000000.0,priced
9,SBTX,"Silverback Therapeutics, Inc.",NASDAQ Global,21.00,11500000,2020-12-04,241500000.0,priced


__Add New Stocks to Database__

In [14]:
# Bring in ipo table
engine = create_engine('postgresql://postgres:postgres@localhost:5432/IPO_tracker')
connection = engine.connect()
sql_ipo_df = pd.read_sql("SELECT * FROM stocks", connection)
sql_ipo_df.head()

Unnamed: 0,id,symbol,company,exchange,proposed_share_price,shares_offered,priced_date,dollar_val_shares,deal_status
0,1,RSVAU,Rodgers Silicon Valley Acquisition Corp,NASDAQ Capital,10.0,20000000,2020-12-02,200000000.0,priced
1,2,TACAU,Trepont Acquistion Corp I,NYSE,10.0,20000000,2020-12-02,200000000.0,priced
2,3,CAP'U,Capitol Investment Corp. V,NYSE,10.0,30000000,2020-12-02,300000000.0,priced
3,4,HTPAU,Highland Transcend Partners I Corp.,NYSE,10.0,27500000,2020-12-03,275000000.0,priced
4,5,FPACU,Far Peak Acquisition Corp,NYSE,10.0,55000000,2020-12-03,550000000.0,priced


In [50]:
new_ipos_df = ipo_df[~ipo_df["symbol"].isin(sql_ipo_df["symbol"])]
new_ipos_df.head()
new_ipos_df

Unnamed: 0,symbol,company,exchange,proposed_share_price,shares_offered,priced_date,dollar_val_shares,deal_status


In [11]:
# load data
engine = create_engine('postgresql://postgres:postgres@localhost:5432/IPO_tracker')
new_ipos_df.to_sql('stocks', con=engine, if_exists='append', index=False)

# Stock Detail & Performance
At this point this is a proof of concept, showing how we will pull the data.

May include thinks like:
1. Stock price
2. Market cap (may choose to exclude smaller cap new stocks to limit to biggest and more interesting IPOs)
3. Launch date open and close price
4. Stock attribute information (tech vs consumer goods vs any summary statement available?)

This worked best:  
https://query1.finance.yahoo.com/v8/finance/chart/AAPL?formatted=true&crumb=T18HKACbWPn&lang=en-US&region=US&events=div%7Csplit&includeAdjustedClose=true&interval=1d&range=2y&corsDomain=finance.yahoo.com

This is it with Unix Date selection (period1=xxxxxxx&period2=xxxxxxxxxx):
https://query2.finance.yahoo.com/v8/finance/chart/AAPL?formatted=true&crumb=T18HKACbWPn&lang=en-US&region=US&includeAdjustedClose=true&interval=1d&period1=1546300800&period2=1608076800&events=div%7Csplit&corsDomain=finance.yahoo.com

This will provide city, industry, etc. if would like to add that:  
https://query1.finance.yahoo.com/v10/finance/quoteSummary/AAPL?modules=assetProfile%2CearningsHistory

May need this to get price for a specific date (likely won't need):  
https://stackoverflow.com/questions/44030983/yahoo-finance-url-not-working

This was what was initially used (likely won't need):  
https://query2.finance.yahoo.com/v10/finance/quoteSummary/AAPL?formatted=true&crumb=8ldhetOu7RJ&lang=en-US&region=US&modules=defaultKeyStatistics%2CfinancialData%2CcalendarEvents&corsDomain=finance.yahoo.com

In [15]:
# set up as a sample for now, will need to determine when to scrape stock info and what to keep
sample_symbols = [ipo_df.iloc[0]['symbol'],
                  ipo_df.iloc[5]['symbol']
                 ]
sample_symbols

['RSVAU', 'LOKBU']

In [27]:
r = requests.get(f'https://query1.finance.yahoo.com/v8/finance/chart/{sample_symbols[0]}?formatted=true&crumb=T18HKACbWPn&lang=en-US&region=US&events=div%7Csplit&includeAdjustedClose=true&interval=1d&range=2y&corsDomain=finance.yahoo.com')
data = r.json()

timestamp = data["chart"]["result"][0]["timestamp"]
stk_open = data["chart"]["result"][0]["indicators"]["quote"][0]["open"]
stk_close = data["chart"]["result"][0]["indicators"]["quote"][0]["close"]
stk_high = data["chart"]["result"][0]["indicators"]["quote"][0]["high"]
stk_low = data["chart"]["result"][0]["indicators"]["quote"][0]["low"]
stk_vol = data["chart"]["result"][0]["indicators"]["quote"][0]["volume"]

In [59]:
# dataframe with new stock info
df = pd.DataFrame({"symbol" : "RSVAU",
                   "unix_time" : timestamp,
                   "date" : [datetime.fromtimestamp(ts).strftime('%Y-%m-%d') for ts in timestamp],
                   "open" : stk_open, 
                   "close" : stk_close,
                   "high" : stk_high,
                   "low" : stk_low,
                   "volume" : stk_vol
                  })

df['date'] = pd.to_datetime(df['date'], format="%Y-%m-%d")
df

Unnamed: 0,symbol,unix_time,date,open,close,high,low,volume
0,RSVAU,1606919400,2020-12-02,10.45,11.22,11.61,10.4,3303100
1,RSVAU,1607005800,2020-12-03,10.85,10.99,11.0,10.7,1140500
2,RSVAU,1607092200,2020-12-04,11.0,10.8,11.4,10.725,1019700
3,RSVAU,1607351400,2020-12-07,10.97,11.06,11.212,10.86,233000
4,RSVAU,1607437800,2020-12-08,11.2,11.8,12.3,11.148,423700
5,RSVAU,1607524200,2020-12-09,11.94,11.32,11.94,11.2,183200
6,RSVAU,1607610600,2020-12-10,11.4,11.56,11.73,11.36,553500
7,RSVAU,1607697000,2020-12-11,12.0,12.2,13.4,11.77,594400
8,RSVAU,1607956200,2020-12-14,12.33,11.97,12.33,11.92,205700
9,RSVAU,1608042600,2020-12-15,12.1,11.45,12.1,11.378,224200


In [None]:
# sample loop through symbols

# loop through symbols and get data for each
for symbol in sample_symbols:
    r = requests.get(f'https://query1.finance.yahoo.com/v8/finance/chart/{symbol}?formatted=true&crumb=T18HKACbWPn&lang=en-US&region=US&events=div%7Csplit&includeAdjustedClose=true&interval=1d&range=2y&corsDomain=finance.yahoo.com')
    data = r.json()
    
    # get stats from the dataset
    price = data['quoteSummary']['result'][0]['financialData']['currentPrice']['raw']
    market_cap = data['quoteSummary']['result'][0]['defaultKeyStatistics']['enterpriseValue']['raw']
    
    market_cap_list.append(market_cap)
    current_price_list.append(price)
    json_stock_data.append(data)

print(current_price_list)
print(market_cap_list)

In [None]:
# dataframe with new stock info
df = pd.DataFrame({"sample_symbols" : sample_symbols, 
                   "current_price" : current_price_list, 
                   "market_cap" : market_cap_list,
                   "date" : datetime.today().strftime('%Y-%m-%d')
                  
                  })
df

# Postgres

__Postgresql__

In [None]:
# create engine
engine = create_engine('postgresql://postgres:postgres@localhost:5432/IPO_tracker')


In [None]:
# Declare a Base using `automap_base()`
Base = automap_base()

In [None]:
# Use the Base class to reflect the database tables
Base.prepare(engine, reflect=True)

In [None]:
# Print all of the classes mapped to the Base
Base.classes.keys()

In [None]:
# Assign the ipo class to a variable called `IPO`
IPO = Base.classes.ipo

In [None]:
# Create a session
session = Session(engine)

In [None]:
# Use the session to query ipo table and display the first 5 symbols
for r in session.query(IPO.symbol, IPO.company).limit(5).all():
    print(r)

In [None]:
# testing update
stock_to_update = session.query(IPO.symbol, IPO.company).filter(IPO.symbol == 'HCDI').\
    update({"company": "crab"}, synchronize_session='evaluate')

In [None]:
# oh wow this commited the update
session.commit()
session.close()

# Analysis

Information that may be interesting to share.  Examples include:
1. Timing of when it launches, how long its been, etc.
2. Price performance
    - Launch date open and close price (how they did on first day)
    - How did it do when its hit 1 month, 3 month, 6 month, 1 year milestone
3. Industry perormance
    - Did it outperform the S&P 
    - Did it outperform they sector (Ex: tech, consumer goods)
4. Top performers
    - Which IPOs did best in last 1 month, 3 month, 6 month, 1 year milestone

In [None]:
# sample analysis for open to close change for one day
appl = sample_df.loc[(sample_df["Ticker"] == "AAPL") & (sample_df["Date"] == "07012020")]
appl_day_change = appl["Close_Price"] / appl["Open_Price"] -1
print(appl_day_change)

In [None]:
# Calculate based on MongoDB record
documents = collection.find({})
response = []
for document in documents:
    try:
        document['_id'] = str(document['_id'])
        response.append(document)
    except:
        response.append(None)
        log.info(f'Could not find {document}')
        
# Example field reference for first record
price_high = response[0]["price_high"]
price_high

In [None]:
# Print text for each record

for i in range(0,len(response)):
  print(f"{response[i]['expected_to_trade']}: {response[i]['company']} [{response[i]['symbol_proposed']}]. Price (Low-High): ${response[i]['price_low']}-{response[i]['price_high']}. #new_ipo_{response[i]['symbol_proposed']}")  

In [None]:
# Establish Twitter connection

import tweepy

CONSUMER_KEY = "consumer_key"
CONSUMER_SECRET = "consumer_secret"   
ACCESS_KEY = "access_key"    
ACCESS_SECRET = "access_secret"

auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)

api = tweepy.API(auth)

In [None]:
# Post a tweet for each record
for i in range(0,len(response)):
  new_tweet = f"{response[i]['expected_to_trade']}: {response[i]['company']} [{response[i]['symbol_proposed']}]. Price (Low-High): ${response[i]['price_low']}-{response[i]['price_high']}. #new_ipo_{response[i]['symbol_proposed']}" 
  api.update_status(new_tweet)      