## Retrieve news articles from NYT API
- To get API key: https://developer.nytimes.com/get-started

In [3]:
import os
import datetime
from dotenv import load_dotenv

import pandas as pd
import numpy as np

from pynytimes import NYTAPI

In [None]:
def get_news(nyt: NYTAPI,start_date: datetime.datetime, end_date: datetime.datetime, *, k=100, query=""):
    """
    get top k most relevent finance news headings on each day from NY times
    """
    res = []
    try:
        articles = nyt.article_search(
            query = query, 
            results = k,
            dates = {
                "begin": start_date,
                "end": end_date
            },
            options = {
                "sort": "relevance", 
                "sources": [
                    "New York Times",
                    "AP",
                    "Reuters",
                    "International Herald Tribune"
                ],
                # Only get information from these news desk
                "news_desk": [
                    "Business",
                    "Financial",
                    "Technology"
                ],
                # "body": [
                # ],
            }
        )
        
        for i in range(len(articles)):
            timestamp = articles[i]['pub_date']
            article_url = articles[i]['web_url']
            lead_paragraph = articles[i]['lead_paragraph']
            abstract = articles[i]['abstract']
            #print(articles[i])
            res.append({'timestamp': timestamp,
                        'article_url': article_url,
                        'lead_paragraph': lead_paragraph,
                        'abstract': abstract,
                        })
        
    except Exception as e:
        print(e)
        res.append({})
    
    df = pd.DataFrame(data=res)

    return df

#### Function to check if the news falls before or after market closes on each day. If it's after close, then the news should be categorised as a next day news.

In [14]:
def adjust_dates(df: pd.DataFrame) -> pd.DataFrame:
    threshold_time = pd.Timestamp('16:00:00').time()
    df['adjusted_date'] = df['timestamp'].apply(lambda x: x.date() if x.time() < threshold_time else (x + pd.Timedelta(days=1)).date())
    return df

#### Set NYTAPI

In [None]:
load_dotenv(dotenv_path='../.env')
NYT_API_KEY = os.getenv("NYT_API_KEY")

nyt = NYTAPI(NYT_API_KEY, parse_dates=True)

## 1. Get articles tesla 

In [None]:
start_date = datetime.datetime(2019, 9, 30)
end_date = datetime.datetime(2024, 9, 27)

In [None]:
k_results = int((end_date - start_date).days * 3)  # How many results to pull for. (x3 as a (conservative) estimate for the max articles that exist.)

article_df = get_news(nyt, start_date, end_date, k=k_results, query='Tesla') 
article_df = adjust_dates(article_df)
article_df = article_df.sort_values(by='timestamp')

In [None]:
article_df

Unnamed: 0,timestamp,article_url,lead_paragraph,abstract,adjusted_date
394,2019-10-02 10:56:22+00:00,https://www.nytimes.com/2019/10/02/business/de...,Good Wednesday morning. (Was this email forwar...,"In leaked audio conversations with employees, ...",2/10/19
50,2019-10-02 20:52:59+00:00,https://www.nytimes.com/2019/10/02/business/te...,Tesla reported record quarterly deliveries on ...,The pace may allow the electric-car maker to f...,3/10/19
207,2019-10-09 10:12:51+00:00,https://www.nytimes.com/2019/10/09/business/de...,Good Wednesdayg. Breaking: The O.E.C.D. propos...,It once was clear what American businesses nee...,9/10/19
372,2019-10-10 10:29:53+00:00,https://www.nytimes.com/2019/10/10/business/de...,Good morning. ( Sign up here to get this in yo...,The tech giant removed an app after facing cri...,10/10/19
189,2019-10-10 18:22:29+00:00,https://www.nytimes.com/2019/10/10/business/pg...,Pacific Gas & Electric’s unusual decision to c...,The California utility turned off power to hun...,11/10/19
...,...,...,...,...,...
1080,2024-09-13 11:58:07+00:00,https://www.nytimes.com/2024/09/13/business/de...,Months after the Miami Dolphins owner Stephen ...,Owner Stephen Ross is talking to private equit...,13/9/24
919,2024-09-18 12:00:08+00:00,https://www.nytimes.com/2024/09/18/business/en...,Owners of electric cars and pickup trucks made...,"After a delay of several months, General Motor...",18/9/24
1316,2024-09-23 12:03:19+00:00,https://www.nytimes.com/2024/09/23/business/de...,First came news that Qualcomm had made an info...,Rivals and at least one big investor are circl...,23/9/24
1333,2024-09-23 12:03:19+00:00,https://www.nytimes.com/2024/09/23/business/de...,First came news that Qualcomm had made an info...,Rivals and at least one big investor are circl...,23/9/24


In [None]:
filename = '../data/Tesla/tesla_headlines.csv'
article_df.to_csv(filename, index=False)

'test'

## 1. Get articles S&P (Market) 

In [29]:
start_date = datetime.datetime(2019, 9, 30)
end_date = datetime.datetime(2024, 9, 27)

In [None]:
k_results = int((end_date - start_date).days * 3)  # How many results to pull for. (x3 as a (conservative) estimate for the max articles that exist.)

article_df = get_news(nyt, start_date, end_date, k=k_results) 
article_df = adjust_dates(article_df)
article_df = article_df.sort_values(by='timestamp')

'test'

In [None]:
article_df

Unnamed: 0,timestamp,article_url,lead_paragraph,abstract,adjusted_date
8,2019-09-30 00:00:15+00:00,https://www.nytimes.com/2019/09/29/business/ho...,"In a part of the world familiar with conflict,...",The territory’s traditional role as a gateway ...,30/9/19
7,2019-09-30 01:04:31+00:00,https://www.nytimes.com/2019/09/29/business/fo...,"Forever 21, the California retailer that helpe...",The retailer fell victim to the eroding power ...,30/9/19
6,2019-09-30 09:00:07+00:00,https://www.nytimes.com/2019/09/30/business/ch...,“Fight the Landlord” is one of the most popula...,"State media puts pressure on Li Ka-shing, a po...",30/9/19
5,2019-09-30 10:30:01+00:00,https://www.nytimes.com/2019/09/30/business/de...,Good Monday morning. (Was this email forwarded...,The venture capitalist Fred Wilson has come up...,30/9/19
4,2019-09-30 14:34:31+00:00,https://www.nytimes.com/2019/09/30/business/gr...,"Seamless, the food delivery service started tw...",Restaurant owners say Grubhub’s business model...,30/9/19
...,...,...,...,...,...
12044,2024-09-27 15:11:30+00:00,https://www.nytimes.com/2024/09/27/technology/...,"Over the past week, Google has called more tha...","The tech giant, which wrapped up its arguments...",27/9/24
12043,2024-09-27 17:45:52+00:00,https://www.nytimes.com/2024/09/27/business/me...,The brief era of muted microphones appears to ...,CBS News has announced the rules and format fo...,28/9/24
12042,2024-09-27 19:38:45+00:00,https://www.nytimes.com/2024/09/27/business/en...,A firm backed by Elliott Investment Management...,"The sale, which requires a judge’s approval, w...",28/9/24
12041,2024-09-27 20:53:49+00:00,https://www.nytimes.com/2024/09/27/technology/...,"OpenAI, the San Francisco start-up behind Chat...",As the company looks for more outside investor...,28/9/24


In [None]:
filename = '../data/S&P_market/s&p_headlines.csv'
article_df.to_csv(filename, index=False)