In [37]:
import requests
import time
import json
import os
import sys
import re
from dotenv import load_dotenv
from collections import defaultdict


load_dotenv()

True

In [16]:
api_key = os.getenv('NYT_API_KEY')

In [31]:
def fetch_articles(query, start_date, end_date, page=0):
    params = {
        'q': query,  # Substitua 'Google' com sua consulta de pesquisa
        'api-key': api_key,
        'begin_date': start_date,
        'end_date': end_date,
        'page': page
    }
    
    url = "https://api.nytimes.com/svc/search/v2/articlesearch.json"
    response = requests.get(url, params=params)
    
    print("Request URL:", response.url)
    print("Response Headers:", response.headers)
    
    if response.status_code != 200:
        print('Failed to fetch data:', response.status_code)
        sys.exit(1)
    
    return response.json()

In [41]:
url = 'https://api.nytimes.com/svc/search/v2/articlesearch.json'

# Dicionário para armazenar os artigos por ano e mês
articles_by_year_month_after2015 = defaultdict(list)

# Loop sobre cada ano e mês de 2015 a 2021
for year in range(2016, 2022):  # De 2015 a 2021
    for month in range(1, 13):  # Cada mês de janeiro (1) a dezembro (12)
        begin_date = f'{year}{month:02d}01'  # Primeiro dia do mês
        end_date = f'{year}{month:02d}28'  # Último dia do mês (para simplificar)

        # Parâmetros da API
        params = {
            'q': 'Google',  # Alterar para a palavra-chave desejada
            'api-key': api_key,
            'begin_date': begin_date,
            'end_date': end_date
        }

        # Fazer a requisição
        response = requests.get(url, params=params)
        print(f"Fetching articles from {begin_date} to {end_date}...")

        if response.status_code != 200:
            print('Failed to fetch data:', response.status_code)
            continue

        data = response.json()

        # Pegar os artigos do mês
        articles = data.get('response', {}).get('docs', [])

        # Armazenar os artigos no dicionário por ano e mês
        for article in articles:
            headline = article['headline']['main'] if 'headline' in article and 'main' in article['headline'] else 'No headline'
            web_url = article['web_url'] if 'web_url' in article else 'No URL'

            # Armazena no formato 'YYYYMM'
            articles_by_year_month_after2015[begin_date[:6]].append({
                'title': headline,
                'url': web_url
            })

        # Aguardar para evitar limite de taxa da API
        time.sleep(12)

# Exibir os artigos agrupados por ano e mês
for year_month, articles in articles_by_year_month_after2015.items():
    print(f"Articles from {year_month}:")
    for article in articles:
        print(f" - Title: {article['title']}")
        print(f"   URL: {article['url']}")
    print('---')

    


Fetching articles from 20160101 to 20160128...
Fetching articles from 20160201 to 20160228...
Fetching articles from 20160301 to 20160328...
Fetching articles from 20160401 to 20160428...
Fetching articles from 20160501 to 20160528...
Fetching articles from 20160601 to 20160628...
Fetching articles from 20160701 to 20160728...
Fetching articles from 20160801 to 20160828...
Fetching articles from 20160901 to 20160928...
Fetching articles from 20161001 to 20161028...
Fetching articles from 20161101 to 20161128...
Fetching articles from 20161201 to 20161228...
Fetching articles from 20170101 to 20170128...
Fetching articles from 20170201 to 20170228...
Fetching articles from 20170301 to 20170328...
Fetching articles from 20170401 to 20170428...
Fetching articles from 20170501 to 20170528...
Fetching articles from 20170601 to 20170628...
Fetching articles from 20170701 to 20170728...
Fetching articles from 20170801 to 20170828...
Fetching articles from 20170901 to 20170928...
Fetching arti

In [40]:
articles_by_year_month



defaultdict(list,
            {'201501': [{'title': 'WikiLeaks Assails Google and the U.S.',
               'url': 'https://www.nytimes.com/2015/01/27/world/europe/wikileaks-assails-google-and-the-us.html'},
              {'title': 'Google and Fidelity Put $1 Billion Into SpaceX',
               'url': 'https://www.nytimes.com/2015/01/21/technology/google-makes-1-billion-investment-in-spacex.html'},
              {'title': 'Video Feature: Conjuring the Most From Google Android',
               'url': 'https://www.nytimes.com/2015/01/22/technology/personaltech/video-feature-android-lollipop-tips-and-tricks.html'},
              {'title': 'What Happens if Apple Drops Google From Its Browser?',
               'url': 'https://bits.blogs.nytimes.com/2015/01/28/what-happens-if-apple-drops-google-from-its-browser/'},
              {'title': 'A Retreat for Google Glass and a Case Study in the Perils of Making Hardware',
               'url': 'https://bits.blogs.nytimes.com/2015/01/18/a-retreat

In [39]:
articles

[{'title': 'Daily Report: Google’s Lousy Time at the D.M.V.',
  'url': 'https://bits.blogs.nytimes.com/2015/12/17/daily-report-googles-lousy-time-at-the-d-m-v/'},
 {'title': 'Gossip and Scandal Lead Google’s Top Searches in 2015',
  'url': 'https://www.nytimes.com/2015/12/17/technology/google-top-search-terms-2015.html'},
 {'title': 'Google’s Latest Steps to Increase Its Use of Renewable Energy',
  'url': 'https://www.nytimes.com/2015/12/04/business/googles-latest-steps-to-increase-its-use-of-renewable-energy.html'},
 {'title': 'Google Cultural Institute Puts Us All Onstage',
  'url': 'https://www.nytimes.com/2015/12/02/arts/music/google-cultural-institute-puts-us-all-onstage.html'},
 {'title': 'How to Use Smart Reply in Gmail Inbox',
  'url': 'https://www.nytimes.com/2015/12/25/technology/personaltech/how-to-use-smart-reply-in-gmail-inbox.html'},
 {'title': 'Missteps in Europe’s Online Privacy Bill',
  'url': 'https://www.nytimes.com/2015/12/21/opinion/missteps-in-europes-online-priva