In [None]:
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
import requests
import time
import os

In [None]:
# df of companies to extract data from
filepath = "data/companies.csv"
company_df = pd.read_csv(filepath)
company_df.head()

In [None]:
# actual number of entries will be closer to around 200, since no data on market closed days
days = 250
start = datetime.now() - timedelta(days)
end = datetime.now()

start = datetime.strptime('2024-03-11', '%Y-%m-%d')
end = datetime.strptime('2024-11-15', '%Y-%m-%d')

for i in range(len(company_df)):
    company = company_df.iloc[i]['Company']
    ticker = company_df.iloc[i]['Ticker']
    
    filename = company.replace(' ', '_')
    filename = filename.lower() + '.csv'
    
    stock_df = yf.download(ticker, start, end, progress=False)
    
    folder = "data/stocks"
    
    if not os.path.exists(folder):
        os.mkdir(folder)
    
    savepath = os.path.join(folder, filename)
    stock_df.to_csv(savepath)

In [None]:
def save_news(filename, company, api):
    company_df = pd.read_csv(os.path.join("data/stocks/", filename))

    company_df['Date'] = pd.to_datetime(company_df['Date'])
    company_df.set_index(company_df["Date"], inplace=True)

    company_df.drop('Date', axis=1, inplace=True) 
    company_df = company_df.tail(150)
    
    news_array = []
    prev_date = company_df.index[0]

    API_KEY = api

    for d in company_df.index:
        if prev_date == d:
            date1 = d
        else:
            date1 = prev_date
        date2 = d

        month1, day1, year1 = date1.strftime('%m'), date1.strftime('%d'), date1.strftime('%Y')
        month2, day2, year2 = date2.strftime('%m'), date2.strftime('%d'), date2.strftime('%Y')
        
        prev_date = d + timedelta(1)
        
        url  = f'https://api.goperigon.com/v1/all?apiKey={API_KEY}&from={year1}-{month1}-{day1}&to={year2}-{month2}-{day2}&showNumResults=true&sortBy=relevance&title="{company}"'

        response = requests.get(url).json()

        # limit to 10 articles per day
        if 'numResults' in response and response['numResults'] > 0:
            num_articles = min(response['numResults'], 10)
        else:
            print(response)
            print(f"Warning: 'numResults' key missing or zero for date range {date1} to {date2}.")
            num_articles = 0

        title_comb = ""
        description_comb = ""
        summary_comb = ""
        positive = 0
        negative = 0
        neutral = 0

        for i in range(num_articles):
            title = response['articles'][i]['title']
            desc = response['articles'][i]['description']
            summary = response['articles'][i]['summary']
            if 'sentiment' in response['articles'][i]:
                sentiment = response['articles'][i]['sentiment']
            else:
                sentiment = {'positive': 0, 'negative': 0, 'neutral': 0}
            
            title_comb += " " + title
            description_comb += " " + desc
            summary_comb += " " + summary
            
            positive += sentiment['positive']
            negative += sentiment['negative']
            neutral += sentiment['neutral']
            
        if num_articles != 0:    
            positive = positive / num_articles
            negative = negative / num_articles
            neutral = neutral / num_articles

            new_row = {'date': date2, 'title': title_comb, 'description': description_comb, 'summary': summary_comb, 'positive': positive, 'negative': negative, 'neutral': neutral}
        else:
            new_row = {'date': date2, 'title': None, 'description': None, 'summary': None, 'positive': None, 'negative': None, 'neutral': None}
            
        news_array.append(new_row)
        time.sleep(5)
        
    df = pd.DataFrame(news_array)

    news_folder = "data/news"

    if not os.path.exists(news_folder):
        os.mkdir(news_folder)

    filename = company.lower() + "_news.csv"

    df.to_csv(os.path.join(news_folder, filename), index=False)

In [None]:
api_key = 'API-KEY-HERE'
save_news('tsmc.csv', 'TSMC', api_key)
save_news('nvidia.csv', 'Nvidia', api_key)
save_news('intel.csv', 'Intel', api_key)
save_news('amd.csv', 'AMD', api_key)
save_news('broadcom.csv', 'Broadcom',api_key)
save_news('asml.csv', 'ASML', api_key)
save_news('micron_technology.csv', 'Micron', api_key)
save_news('qualcomm.csv', 'Qualcomm', api_key)
save_news('lam_research.csv', 'Lam Research', api_key)
save_news('samsung_electronics.csv', 'Samsung Electronics', api_key)
save_news('texas_instruments.csv', 'Texas Instruments', api_key)
save_news('nxp_semiconductors.csv', 'NXP', api_key)