# Dependencies

In [1]:
# import libraries
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
from bs4 import BeautifulSoup
import time

In [2]:
# import pandas
import pandas as pd 

# Extract

## Scrape IPOs

In [3]:
# IPO Scoop Website
ipo_scoop_url = 'https://www.iposcoop.com/'
ipo_scoop_ipo = pd.read_html(ipo_scoop_url)
ipo_scoop_ipo[0]

Unnamed: 0,Company,Symbol proposed,Lead Managers,Shares (Millions),Price Low,Price High,Est. $ Volume,Expected to Trade,SCOOP Rating,Rating Change
0,1847 Goedeker,GOED,ThinkEquity (a division of Fordham Financial M...,1.0,9.0,11.0,$ 10.0 mil,7/23/2020 Thursday,S/O,S/O
1,East Resources Acquisition,ERESU,Wells Fargo Securities,30.0,10.0,10.0,$ 300.0 mil,7/23/2020 Priced,S/O,S/O
2,Harbor Custom Development,HCDI,ThinkEquity (a division of Fordham Financial M...,2.1,6.0,8.0,$ 15.0 mil,7/23/2020 Thursday,S/O,S/O


Need to investigate how to find IPO information and how to pull.  Initially had challenges using pandas to pull tables directly from Nasdaq and NYSE--can investigate other methods for scraping.  Here's some commentary how to find:  https://www.investopedia.com/articles/investing/050115/how-track-upcoming-ipos.asp

May also be able to get from yahoo or google finance APIs.

In [4]:
# Nasdaq Site (this is taking forever to run, maybe pandas won't work here)
#ipo_nasdaq_url = "https://www.nasdaq.com/market-activity/ipos?tab=upcoming"
#ipo_nasdaq_url = pd.read_html(ipo_nasdaq_url)
#ipo_nasdaq_url


## Additional Stock Info

Using APIs, get stock info on IPOs.  Then may include thinks like:
1. When an IPO is set to lauch
2. Market cap (may choose to exclude smaller cap new stocks to limit to biggest and more interesting IPOs)
3. Launch date open and close price
4. Stock attribute information (tech vs consumer goods vs any summary statement available?)

Two potential APIs to try:

__Google__  
https://pypi.org/project/googlefinance/
 
__Yahoo__  
https://github.com/ranaroussi/yfinance  
https://towardsdatascience.com/free-stock-data-for-python-using-yahoo-finance-api-9dafd96cad2e  
https://rapidapi.com/apidojo/api/yahoo-finance1  

__Other__  
https://medium.com/@patrick.collins_58673/stock-api-landscape-5c6e054ee631  

__Quandl__ (what was used in school)  
https://blog.quandl.com/api-for-stock-data

In [4]:
# yahoo finance example

import yfinance as yf

stock = yf.Ticker("GOOG")

# get stock info
print(stock.info)
print("------------------------")

# get financials (returns empty dataframe need to troublshoot) -- also didn't have recent stock like Lemonade (LMND) 
print(stock.financials)

{'zip': '94043', 'sector': 'Communication Services', 'fullTimeEmployees': 123048, 'longBusinessSummary': 'Alphabet Inc. provides online advertising services in the United States, Europe, the Middle East, Africa, the Asia-Pacific, Canada, and Latin America. It offers performance and brand advertising services. The company operates through Google and Other Bets segments. The Google segment offers products, such as Ads, Android, Chrome, Google Cloud, Google Maps, Google Play, Hardware, Search, and YouTube, as well as technical infrastructure. It also offers digital content, cloud services, hardware devices, and other miscellaneous products and services. The Other Bets segment includes businesses, including Access, Calico, CapitalG, GV, Verily, Waymo, and X, as well as Internet and television services. Alphabet Inc. was founded in 1998 and is headquartered in Mountain View, California.', 'city': 'Mountain View', 'phone': '650-253-0000', 'state': 'CA', 'country': 'United States', 'companyOf

In [7]:
# Yahoo test through Rapid API. Note limits is 500 per month and 10 requests per minute, 
# maybe not best route as pro is $10 per month

import requests

url = "https://yahoo-finance15.p.rapidapi.com/api/yahoo/qu/quote/LMND"
x-rapid-key = "XXXXXXXXXXXXXXXXXXXXXXX"

headers = {
    'x-rapidapi-host': "yahoo-finance15.p.rapidapi.com",
    'x-rapidapi-key': x-rapid-key
    }

#### un-comment out below to run (commented so don't get too many API calls) ####
#response = requests.request("GET", url, headers=headers)

print(response.text)

SyntaxError: can't assign to operator (<ipython-input-7-73a0190f4995>, line 7)

In [7]:
# Google test, gets "forbidden URL error message"
#from googlefinance import getQuotes
#import json
#json.dumps(getQuotes('AAPL'), indent=2)

# Load

Get data tranformed into a format we'd want to load to a database (noSQL or SQL).  

In [5]:
# this is a sample dataframe with fake numbers, may expand columns but this are primary ones needed
stock_data = {"Ticker": ["AAPL", "LMND"],
              "Date": ["07012020", "07012020"],
              "Open_Price": [88, 38],
              "Close_Price": [89, 55],
              "Market_Cap" : [145000, 30000]
             }
sample_df = pd.DataFrame(stock_data)
sample_df

Unnamed: 0,Ticker,Date,Open_Price,Close_Price,Market_Cap
0,AAPL,7012020,88,89,145000
1,LMND,7012020,38,55,30000


In [5]:
# Robin: here are values from IPO Scoop

ipo_df = ipo_scoop_ipo[0]
ipo_df.columns = ["company", "symbol_proposed","lead_managers","shares_mil","price_low","price_high","est_volume","expected_to_trade","scoop_rating","rating_change"]
ipo_df

Unnamed: 0,company,symbol_proposed,lead_managers,shares_mil,price_low,price_high,est_volume,expected_to_trade,scoop_rating,rating_change
0,1847 Goedeker,GOED,ThinkEquity (a division of Fordham Financial M...,1.0,9.0,11.0,$ 10.0 mil,7/23/2020 Thursday,S/O,S/O
1,East Resources Acquisition,ERESU,Wells Fargo Securities,30.0,10.0,10.0,$ 300.0 mil,7/23/2020 Priced,S/O,S/O
2,Harbor Custom Development,HCDI,ThinkEquity (a division of Fordham Financial M...,2.1,6.0,8.0,$ 15.0 mil,7/23/2020 Thursday,S/O,S/O


In [6]:
import pymongo

# Mongo DB configuration
mg_usr = 'username'
mg_pwd = 'password'

client = pymongo.MongoClient(f"mongodb+srv://{mg_usr}:{mg_pwd}@cluster0-xcn4s.mongodb.net/test?retryWrites=true&w=majority")
db = client['upcoming_ipos']
collection = db['ipos']

In [15]:
# Convert dataframe to dictionary records
data_dict = ipo_df.to_dict("records")

In [16]:
# Add records
collection.insert_many(data_dict)

<pymongo.results.InsertManyResult at 0x2a55e0fa448>

# Analysis

Information that may be interesting to share.  Examples include:
1. Timing of when it launches, how long its been, etc.
2. Price performance
    - Launch date open and close price (how they did on first day)
    - How did it do when its hit 1 month, 3 month, 6 month, 1 year milestone
3. Industry perormance
    - Did it outperform the S&P 
    - Did it outperform they sector (Ex: tech, consumer goods)
4. Top performers
    - Which IPOs did best in last 1 month, 3 month, 6 month, 1 year milestone

In [18]:
# sample analysis for open to close change for one day
appl = sample_df.loc[(sample_df["Ticker"] == "AAPL") & (sample_df["Date"] == "07012020")]
appl_day_change = appl["Close_Price"] / appl["Open_Price"] -1
print(appl_day_change)

0    0.011364
dtype: float64


In [47]:
# Calculate based on MongoDB record
documents = collection.find({})
response = []
for document in documents:
    try:
        document['_id'] = str(document['_id'])
        response.append(document)
    except:
        response.append(None)
        log.info(f'Could not find {document}')
        
# Example field reference for first record
price_high = response[0]["price_high"]
price_high

11.0

In [48]:
# Print text for each record

for i in range(0,len(response)):
  print(f"{response[i]['expected_to_trade']}: {response[i]['company']} [{response[i]['symbol_proposed']}]. Price (Low-High): ${response[i]['price_low']}-{response[i]['price_high']}. #new_ipo_{response[i]['symbol_proposed']}")  

7/23/2020 Thursday: 1847 Goedeker [GOED]. Price (Low-High): $9.0-11.0. #new_ipo_GOED
7/23/2020 Priced: East Resources Acquisition [ERESU]. Price (Low-High): $10.0-10.0. #new_ipo_ERESU
7/23/2020 Thursday: Harbor Custom Development [HCDI]. Price (Low-High): $6.0-8.0. #new_ipo_HCDI


In [35]:
# Establish Twitter connection

import tweepy

CONSUMER_KEY = "consumer_key"
CONSUMER_SECRET = "consumer_secret"   
ACCESS_KEY = "access_key"    
ACCESS_SECRET = "access_secret"

auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)

api = tweepy.API(auth)

In [49]:
# Post a tweet for each record
for i in range(0,len(response)):
  new_tweet = f"{response[i]['expected_to_trade']}: {response[i]['company']} [{response[i]['symbol_proposed']}]. Price (Low-High): ${response[i]['price_low']}-{response[i]['price_high']}. #new_ipo_{response[i]['symbol_proposed']}" 
  api.update_status(new_tweet)      