In [2]:
import requests
from bs4 import BeautifulSoup
import yfinance as yf
from datetime import datetime
from datetime import timedelta
import pandas as pd
import json

def changes_from_press(stock_data, press_date, period):
    next_day = timedelta(days=1)
    time_after_release = timedelta(days=period)
    

    next_trading_day = press_date
    day_1_price = stock_data[stock_data['Date'] == press_date]['Open'].values

    while not day_1_price:
        next_trading_day = next_trading_day + next_day

        day_1_price = stock_data[stock_data['Date'] == (next_trading_day)]['Open'].values

    next_trading_day = next_trading_day + time_after_release
    day_2_price = stock_data[stock_data['Date'] == (next_trading_day)]['Close'].values

    while not day_2_price:
        next_trading_day = next_trading_day + next_day

        day_2_price = stock_data[stock_data['Date'] == (next_trading_day + next_day)]['Close'].values


    pct_change = ((day_2_price - day_1_price) / day_2_price)*100
    pct_change = pct_change[0]
    return pct_change

def get_df(ticker):
    
    stock_data = yf.Ticker(ticker)

    # get historical market data
    stock_hist = stock_data.history(period="max")

    stock_hist.reset_index(inplace=True)

    stock_hist['Date'] = pd.to_datetime(stock_hist['Date']).dt.date

    stock_hist['Pct_Close'] = stock_hist['Close'].pct_change()*100

    return stock_hist


In [10]:
data = []
page_num = 0

ticker = 'VRTX'

stock_hist = get_df(ticker)

while True:
    
    x = requests.get(f'https://investors.vrtx.com/press-releases?page={page_num}')
    soup = BeautifulSoup(x.text)
    
    press_table = soup.find('table', attrs={'nirtable views-table views-view-table cols-3 collapse-table'})
    if press_table == None:
        break
    
    table_body = press_table.find('tbody')
    rows = table_body.find_all('tr')
    
    for row in rows:
        print(row)
        
        cols = row.find_all('td')
            
        cols = [ele.text.strip().replace('\n', '') for ele in cols]
               
        del cols[2]
        
        cols[0] = cols[0][14:].replace(',', '')
        cols[0] = datetime.strptime(cols[0], '%b %d %Y').date()
        
        pct_change = changes_from_press(stock_hist, cols[0], 1)
        cols.append(pct_change)
        data.append(cols) # Get rid of empty values
    page_num = page_num + 1
    
    
data = pd.DataFrame(data, columns=['date', 'press title', '1d change'])

dates_str = [date.strftime("%m/%d/%Y") for date in data['date'].tolist()]

dict_data = {
    'ticker': ticker,
    'data': {
        'dates':dates_str,
        'title': data['press title'].tolist(),
        'priceChange': data['1d change'].tolist(),
    },
}

with open(f'./data/{ticker}.json', 'w', encoding='utf-8') as f:
    json.dump(dict_data, f, ensure_ascii=False, indent=4)

<tr>
<td class="views-field views-field-field-nir-news-date" headers="view-field-nir-news-date-table-column"><a class="summary-toggle nir-news-widget-summary-toggle-date" href="#"><span>Toggle Summary</span></a><time class="datetime" datetime="00Z" timezone="America/New_York">Apr 10, 2023</time> </td>
<td class="views-field views-field-field-nir-news-title" headers="view-field-nir-news-title-table-column">
<a href="/news-releases/news-release-details/vertex-announce-first-quarter-2023-financial-results-may-1" hreflang="en">Vertex to Announce First Quarter 2023 Financial Results on May 1</a>
<div class="nir-news-table-teaser summary-hidden">
<summary>
  BOSTON --(BUSINESS WIRE)--Apr. 10, 2023-- Vertex Pharmaceuticals Incorporated (Nasdaq: VRTX) will report its first quarter 2023 financial results on Monday, May 1, 2023 after the financial markets close. The company will host a conference call and webcast at 4:30 p.m. ET .
  </summary>
</div>
</td>
<td class="views-field views-field-fiel

  while not day_2_price:

KeyboardInterrupt

