In [23]:
import requests
import pandas as pd



In [87]:
def fetch_news(api_key, query, from_date=None, to_date=None, language='en', sort_by='publishedAt', page_size=100):
    """
    Fetch news articles from the News API and return a pandas DataFrame.
    
    Parameters:
        api_key (str): Your News API key.
        query (str): Keywords or phrases to search for in the news.
        from_date (str): Start date for the news in YYYY-MM-DD format.
        to_date (str): End date for the news in YYYY-MM-DD format.
        language (str): Language of the news articles (default is 'en').
        sort_by (str): The order to sort articles. Options: 'relevancy', 'popularity', 'publishedAt'.
        page_size (int): Number of articles per page (max is 100).
    
    Returns:
        pd.DataFrame: A DataFrame containing news articles.
    """
    url = "https://newsapi.org/v2/everything"
    params = {
        'q': query,
        'from': from_date,
        'to': to_date,
        'language': language,
        'sortBy': sort_by,
        'pageSize': page_size,
        'apiKey': api_key
    }
    
    response = requests.get(url, params=params)
    if response.status_code != 200:
        raise Exception(f"Failed to fetch data: {response.status_code} - {response.json().get('message')}")
    
    articles = response.json().get('articles', [])
    if not articles:
        print("No articles found.")
        return pd.DataFrame()
    
    # Create a DataFrame from the articles
    df = pd.DataFrame(articles)
    return df

# Example usage
if __name__ == "__main__":
    # Replace 'your_api_key_here' with your actual API key from News API
    API_KEY = "b7b86ece41904e0299256a5e4b99976a"
    QUERY = "(real estate OR property) AND (prices OR transactions)"
    FROM_DATE = "2024-10-19"
    TO_DATE = "2024-11-19"
    SORT_BY = "relevancy"

    try:
        news_df = fetch_news(API_KEY, QUERY, FROM_DATE, TO_DATE, language="en", sort_by=SORT_BY)
        print(news_df.head())  # Display the first few rows of the DataFrame
        print(news_df.info())  # Display info regarding the DataFrame
        news_df.to_csv('newsapi_news.csv', index=False)
        print("DataFrame saved to 'newsapi_news.csv'.")
    except Exception as e:
        print(f"Error: {e}")

                                              source  \
0  {'id': 'business-insider', 'name': 'Business I...   
1                       {'id': None, 'name': 'CNET'}   
2        {'id': None, 'name': 'Yahoo Entertainment'}   
3  {'id': 'business-insider', 'name': 'Business I...   
4               {'id': None, 'name': 'Slashdot.org'}   

                                          author  \
0                                   Jennifer Sor   
1                                 Katherine Watt   
2                                    Jami Farkas   
3  kelkins@businessinsider.com (Kathleen Elkins)   
4                                    EditorDavid   

                                               title  \
0  Commercial real estate foreclosures jumped alm...   
1  Zillow Doomscrolling: Why I Can't Stop Looking...   
2  3 Best States To Buy Property in the Next 5 Ye...   
3  A real-estate investor says he's cracked the c...   
4  Washington Post Employees Ordered Back To the ...   

             