In [2]:
import requests
from bs4 import BeautifulSoup
import yfinance as yf
from datetime import datetime
from datetime import timedelta
import pandas as pd
import json

def changes_from_press(stock_data, press_date, period):
    next_day = timedelta(days=1)
    time_after_release = timedelta(days=period)
    

    next_trading_day = press_date
    
    # try to obtain share price at open on press release date
    day_1_price = stock_data[stock_data['Date'] == press_date]['Open'].values


    # increase the date till the first available open price is found
    num_days = 1
    while not day_1_price:
        if num_days > 30:
            return None
        
        next_trading_day = next_trading_day + next_day

        day_1_price = stock_data[stock_data['Date'] == (next_trading_day)]['Open'].values
        num_days = num_days + 1

    # get next day if available
    next_trading_day = next_trading_day + time_after_release
    day_2_price = stock_data[stock_data['Date'] == (next_trading_day)]['Close'].values

    # increase the date till the second available close price is found
    num_days = 1
    while not day_2_price:
        if num_days > 30: 
            return None
        
        next_trading_day = next_trading_day + next_day

        day_2_price = stock_data[stock_data['Date'] == (next_trading_day + next_day)]['Close'].values
        num_days = num_days + 1
    
    # calcualte percent difference between share prices
    pct_change = ((day_2_price - day_1_price) / day_2_price)*100
    pct_change = pct_change[0]
    return pct_change

def get_df(ticker):
    
    stock_data = yf.Ticker(ticker)

    # get historical market data
    stock_hist = stock_data.history(period="max")

    stock_hist.reset_index(inplace=True)

    stock_hist['Date'] = pd.to_datetime(stock_hist['Date']).dt.date

    stock_hist['Pct_Close'] = stock_hist['Close'].pct_change()*100

    return stock_hist

In [3]:
data = []
page_num = 0

ticker = 'VRTX'

stock_hist = get_df(ticker)

while True:
    
    x = requests.get(f'https://investors.vrtx.com/press-releases?page={page_num}')
    soup = BeautifulSoup(x.text)
    
    press_table = soup.find('table', attrs={'nirtable views-table views-view-table cols-3 collapse-table'})
    if press_table == None:
        break
    
    table_body = press_table.find('tbody')
    rows = table_body.find_all('tr')
    
    for row in rows:
        
        cols = row.find_all('td')
            
        cols = [ele.text.strip().replace('\n', '') for ele in cols]
               
        del cols[2]
        
        cols[0] = cols[0][14:].replace(',', '')
        cols[0] = datetime.strptime(cols[0], '%b %d %Y').date()
        
        pct_change = changes_from_press(stock_hist, cols[0], 1)
        cols.append(pct_change)
        data.append(cols) # Get rid of empty values
    page_num = page_num + 1
    
    
data = pd.DataFrame(data, columns=['date', 'press title', '1d change'])

data = data.dropna()

dates_str = [date.strftime("%m/%d/%Y") for date in data['date'].tolist()]
result = data.to_json(orient="values", index=True)


dict_data = {
    'ticker': ticker,
    'data': json.loads(result),
    },


with open(f'./data/{ticker}.json', 'w', encoding='utf-8') as f:
    json.dump(dict_data, f, ensure_ascii=False, indent=4)

  while not day_2_price:
  while not day_1_price:
