In [1]:
import re
import requests
from bs4 import BeautifulSoup
import yfinance as yf
from datetime import datetime
from datetime import timedelta
import pandas as pd
import json

def changes_from_press(stock_data, press_date, period):
    next_day = timedelta(days=1)
    time_after_release = timedelta(days=period)
    

    next_trading_day = press_date
    
    # try to obtain share price at open on press release date
    day_1_price = stock_data[stock_data['Date'] == press_date]['Open'].values


    # increase the date till the first available open price is found
    num_days = 1
    while not day_1_price:
        if num_days > 30:
            return None
        
        next_trading_day = next_trading_day + next_day

        day_1_price = stock_data[stock_data['Date'] == (next_trading_day)]['Open'].values
        num_days = num_days + 1

    # get next day if available
    next_trading_day = next_trading_day + time_after_release
    day_2_price = stock_data[stock_data['Date'] == (next_trading_day)]['Close'].values

    # increase the date till the second available close price is found
    num_days = 1
    while not day_2_price:
        if num_days > 30: 
            return None
        
        next_trading_day = next_trading_day + next_day

        day_2_price = stock_data[stock_data['Date'] == (next_trading_day + next_day)]['Close'].values
        num_days = num_days + 1
    
    # calcualte percent difference between share prices
    pct_change = ((day_2_price - day_1_price) / day_2_price)*100
    pct_change = pct_change[0]
    return pct_change

def get_df(ticker):
    
    stock_data = yf.Ticker(ticker)

    # get historical market data
    stock_hist = stock_data.history(period="max")

    stock_hist.reset_index(inplace=True)

    stock_hist['Date'] = pd.to_datetime(stock_hist['Date']).dt.date

    stock_hist['Pct_Close'] = stock_hist['Close'].pct_change()*100

    return stock_hist


In [2]:
data = []
page_num = 1

ticker = 'AAPL'

stock_hist = get_df(ticker)

previous_items = None

while True:

    
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36"}
    s = requests.Session()
    html = requests.get(f'https://www.apple.com/newsroom/archive/?page={page_num}', headers=headers)


    soup = BeautifulSoup(html.content) 
    press_items = soup.find_all('a', attrs={'class':'result__item row-anchor'})
    
    if press_items == previous_items:
        break
    
    for press_item in press_items:
        
        type_release =  press_item.find('p', attrs={'class':'item__category category_release'})
        if type_release != None:
            release = press_item.find('h3', attrs={'class':'item__headline'}).text.replace('\n', '')
            release = re.sub(' +', ' ', release)
            release = release[1:-1]
            
            date = press_item.find('p', attrs={'class':'item__date'}).text.replace(',','')
            date_parts = date.split(' ')
            date = f'{date[0:3]} {date_parts[1]} {date_parts[2]}'
            date = datetime.strptime(date, '%b %d %Y').date()
            
            pct_change = changes_from_press(stock_hist, date, 1) 
            
            data.append([date, release, pct_change])
            print([date, release, pct_change])
        
    page_num = page_num + 1
    previous_items = press_items

data = pd.DataFrame(data, columns=['date', 'press title', '1d change'])

dates_str = [date.strftime("%m/%d/%Y") for date in data['date'].tolist()]

dict_data = {
    'ticker': ticker,
    
    'data': {
        'dates':dates_str,
        'title': data['press title'].tolist(),
        'priceChange': data['1d change'].tolist(),
    },
}

with open(f'./data/{ticker}.json', 'w', encoding='utf-8') as f:
    json.dump(dict_data, f, ensure_ascii=False, indent=4)
  

[datetime.date(2023, 3, 29), 'Apple’s Worldwide Developers Conference returns June 5', 1.8415899740846817]
[datetime.date(2023, 3, 28), 'Apple Gangnam will welcome first customers this Friday, March 31 in South Korea', 1.7416203130856285]
[datetime.date(2023, 3, 28), 'Apple introduces Apple\xa0Pay\xa0Later', 1.7416203130856285]
[datetime.date(2023, 3, 28), 'Apple Music Classical is here', 1.7416203130856285]
[datetime.date(2023, 3, 14), 'Apple introduces Shop with a Specialist over Video', 1.1177244607286418]
[datetime.date(2023, 3, 12), 'Apple TV+ wins Academy Award for The Boy, the Mole, the Fox and the Hors', 3.1325767704405676]
[datetime.date(2023, 3, 7), 'Hello, yellow! Apple introduces new iPhone\xa014 and iPhone\xa014\xa0Plus', -0.5429462010634738]
[datetime.date(2023, 2, 2), 'Apple reports first quarter results', 3.6245961562089146]


  while not day_1_price:


[datetime.date(2023, 1, 30), 'Apple Music launches Rihanna’s Road\xa0to\xa0Halftime ahead of Super\xa0Bowl\xa0LVII', -0.4643532259358702]
[datetime.date(2023, 1, 24), 'Apple builds on privacy commitment by unveiling new efforts on Data Privacy Day', 1.0926293328132557]
[datetime.date(2023, 1, 18), 'Apple introduces the new HomePod with breakthrough sound and intelligence', -1.1458587187748817]
[datetime.date(2023, 1, 17), 'Apple unveils M2\xa0Pro and M2\xa0Max: next-generation chips for next-level workflows', 0.2810486298951928]
[datetime.date(2023, 1, 17), 'Apple introduces new Mac\xa0mini with M2 and M2\xa0Pro — more powerful, capable, and versatile than ever', 0.2810486298951928]
[datetime.date(2023, 1, 17), 'Apple unveils MacBook\xa0Pro featuring M2\xa0Pro and M2\xa0Max', 0.2810486298951928]
[datetime.date(2022, 12, 6), 'Apple announces biggest upgrade to App Store pricing, adding 700 new price points', -4.3493698884438]
[datetime.date(2022, 12, 6), 'Apple introduces Apple\xa0Music


KeyboardInterrupt



In [None]:
https://www.verizon.com/about/api/news_center/press_release?offset=80&tags=&year=2021&keywords=