In [2]:
import requests
from bs4 import BeautifulSoup
import yfinance as yf
from datetime import datetime
from datetime import timedelta
import pandas as pd
import json

def changes_from_press(stock_data, press_date, period):
    next_day = timedelta(days=1)
    time_after_release = timedelta(days=period)
    

    next_trading_day = press_date
    
    # try to obtain share price at open on press release date
    day_1_price = stock_data[stock_data['Date'] == press_date]['Open'].values


    # increase the date till the first available open price is found
    num_days = 1
    while not day_1_price:
        if num_days > 30:
            return None
        
        next_trading_day = next_trading_day + next_day

        day_1_price = stock_data[stock_data['Date'] == (next_trading_day)]['Open'].values
        num_days = num_days + 1

    # get next day if available
    next_trading_day = next_trading_day + time_after_release
    day_2_price = stock_data[stock_data['Date'] == (next_trading_day)]['Close'].values

    # increase the date till the second available close price is found
    num_days = 1
    while not day_2_price:
        if num_days > 30: 
            return None
        
        next_trading_day = next_trading_day + next_day

        day_2_price = stock_data[stock_data['Date'] == (next_trading_day + next_day)]['Close'].values
        num_days = num_days + 1
    
    # calcualte percent difference between share prices
    pct_change = ((day_2_price - day_1_price) / day_2_price)*100
    pct_change = pct_change[0]
    return pct_change

def get_df(ticker):
    
    stock_data = yf.Ticker(ticker)

    # get historical market data
    stock_hist = stock_data.history(period="max")

    stock_hist.reset_index(inplace=True)

    stock_hist['Date'] = pd.to_datetime(stock_hist['Date']).dt.date

    stock_hist['Pct_Close'] = stock_hist['Close'].pct_change()*100

    return stock_hist

In [None]:
import requests
from bs4 import BeautifulSoup
import yfinance as yf
from datetime import datetime
from datetime import timedelta
import pandas as pd
import re

data = []
offset = 0

ticker = 'lly'

stock_hist = get_df(ticker)

# create break
while True:
    
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36"}
    
    s = requests.Session()
    html = requests.get(f'https://lilly.mediaroom.com/index.php?s=9042&l=100&o={offset}/', headers=headers)
    

    content = BeautifulSoup(html.content) 

    articles = content.find_all('div',attrs={'class': 'wd_item_wrapper'})
    
        
    if not content:
        break

    
    for article in articles:
        date = article.find('div', attrs={'wd_date'}).text.lstrip()
        title = article.find('div', attrs={'wd_title'}).text
        
        
        date = date.lstrip().rstrip()    # remove starting and trailing whitespaces
        title = title.lstrip().rstrip() 
        
        
        
        date = datetime.strptime(date, '%B %d, %Y').date()
        pct_change = changes_from_press(stock_hist, date, 1)
        
        data.append([date, title, pct_change])
    print(data)
        

    
    offset = offset + 100


data = pd.DataFrame(data, columns=['date', 'press title', '1d change'])

data = data.dropna()

dates_str = [date.strftime("%m/%d/%Y") for date in data['date'].tolist()]
result = data.to_json(orient="values", index=True)


dict_data = {
    'ticker': ticker,
    'data': json.loads(result),
    },


with open(f'./data/{ticker}.json', 'w', encoding='utf-8') as f:
    json.dump(dict_data, f, ensure_ascii=False, indent=4)

  while not day_2_price:
  while not day_1_price:


[[datetime.date(2023, 4, 27), 'Lilly Reports First-Quarter 2023 Financial Results, Highlights Continued Core Business Growth and Pipeline Momentum', None], [datetime.date(2023, 4, 27), "Lilly's tirzepatide achieved up to 15.7% weight loss in adults with obesity or overweight and type 2 diabetes in SURMOUNT-2", None], [datetime.date(2023, 4, 24), 'Lilly to Divest BAQSIMI to Amphastar', -0.10393771445033984], [datetime.date(2023, 4, 17), 'Lilly to Make Record-Breaking Investment in Indiana Manufacturing Facilities', -1.3760093772808675], [datetime.date(2023, 4, 13), 'U.S. Food and Drug Administration Issues Complete Response Letter for Mirikizumab', 0.563076296874874], [datetime.date(2023, 4, 13), 'Lilly Confirms Date and Conference Call for First-Quarter 2023 Financial Results Announcement', 0.563076296874874], [datetime.date(2023, 3, 14), 'Lilly Announces Details of Presentations at 2023 American Association for Cancer Research (AACR) Annual Meeting', 1.645070388093893], [datetime.date

[[datetime.date(2023, 4, 27), 'Lilly Reports First-Quarter 2023 Financial Results, Highlights Continued Core Business Growth and Pipeline Momentum', None], [datetime.date(2023, 4, 27), "Lilly's tirzepatide achieved up to 15.7% weight loss in adults with obesity or overweight and type 2 diabetes in SURMOUNT-2", None], [datetime.date(2023, 4, 24), 'Lilly to Divest BAQSIMI to Amphastar', -0.10393771445033984], [datetime.date(2023, 4, 17), 'Lilly to Make Record-Breaking Investment in Indiana Manufacturing Facilities', -1.3760093772808675], [datetime.date(2023, 4, 13), 'U.S. Food and Drug Administration Issues Complete Response Letter for Mirikizumab', 0.563076296874874], [datetime.date(2023, 4, 13), 'Lilly Confirms Date and Conference Call for First-Quarter 2023 Financial Results Announcement', 0.563076296874874], [datetime.date(2023, 3, 14), 'Lilly Announces Details of Presentations at 2023 American Association for Cancer Research (AACR) Annual Meeting', 1.645070388093893], [datetime.date

[[datetime.date(2023, 4, 27), 'Lilly Reports First-Quarter 2023 Financial Results, Highlights Continued Core Business Growth and Pipeline Momentum', None], [datetime.date(2023, 4, 27), "Lilly's tirzepatide achieved up to 15.7% weight loss in adults with obesity or overweight and type 2 diabetes in SURMOUNT-2", None], [datetime.date(2023, 4, 24), 'Lilly to Divest BAQSIMI to Amphastar', -0.10393771445033984], [datetime.date(2023, 4, 17), 'Lilly to Make Record-Breaking Investment in Indiana Manufacturing Facilities', -1.3760093772808675], [datetime.date(2023, 4, 13), 'U.S. Food and Drug Administration Issues Complete Response Letter for Mirikizumab', 0.563076296874874], [datetime.date(2023, 4, 13), 'Lilly Confirms Date and Conference Call for First-Quarter 2023 Financial Results Announcement', 0.563076296874874], [datetime.date(2023, 3, 14), 'Lilly Announces Details of Presentations at 2023 American Association for Cancer Research (AACR) Annual Meeting', 1.645070388093893], [datetime.date

[[datetime.date(2023, 4, 27), 'Lilly Reports First-Quarter 2023 Financial Results, Highlights Continued Core Business Growth and Pipeline Momentum', None], [datetime.date(2023, 4, 27), "Lilly's tirzepatide achieved up to 15.7% weight loss in adults with obesity or overweight and type 2 diabetes in SURMOUNT-2", None], [datetime.date(2023, 4, 24), 'Lilly to Divest BAQSIMI to Amphastar', -0.10393771445033984], [datetime.date(2023, 4, 17), 'Lilly to Make Record-Breaking Investment in Indiana Manufacturing Facilities', -1.3760093772808675], [datetime.date(2023, 4, 13), 'U.S. Food and Drug Administration Issues Complete Response Letter for Mirikizumab', 0.563076296874874], [datetime.date(2023, 4, 13), 'Lilly Confirms Date and Conference Call for First-Quarter 2023 Financial Results Announcement', 0.563076296874874], [datetime.date(2023, 3, 14), 'Lilly Announces Details of Presentations at 2023 American Association for Cancer Research (AACR) Annual Meeting', 1.645070388093893], [datetime.date