Defaulting to user installation because normal site-packages is not writeable
Collecting zipfile36
  Using cached zipfile36-0.1.3-py3-none-any.whl.metadata (736 bytes)
Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement io (from versions: none)
ERROR: No matching distribution found for io


In [8]:
import requests
import zipfile
import io
import pandas as pd
from datetime import date

# Set the date for the file you want to download
# Using a specific date that is known to be available
# Change this date as needed, for example: date_to_download = date(2025, 9, 21)
date_to_download = date(2025, 9, 21)
date_str = date_to_download.strftime("%Y%m%d")

# Construct the URL
url = f"http://data.gdeltproject.org/events/{date_str}.export.CSV.zip"
print(f"Attempting to download GDELT file for {date_str}...")

try:
    # Download the compressed file
    response = requests.get(url)
    
    # Check if the download was successful
    if response.status_code == 200:
        print("File downloaded successfully.")
        
        # Read the zip content into memory
        zip_content = io.BytesIO(response.content)
        
        # Decompress the CSV file and load into a pandas DataFrame
        with zipfile.ZipFile(zip_content, 'r') as zip_ref:
            csv_file_name = zip_ref.namelist()[0]
            df = pd.read_csv(zip_ref.open(csv_file_name), sep='\t', header=None, encoding='ISO-8859-1')
        
        print("Data loaded into DataFrame.")
        
        # Define the Excel file name
        excel_filename = f"gdelt_events_{date_str}.xlsx"
        
        # Export the DataFrame to an Excel file
        df.to_excel(excel_filename, index=False)
        
        print(f"\nData successfully exported to '{excel_filename}'.")
    elif response.status_code == 404:
        print(f"Error 404: The file for {date_str} was not found on the GDELT server.")
        print("Possible reasons: The data has not been published yet or the date is incorrect.")
        print("Please check the GDELT data download page to confirm availability.")
    else:
        print(f"An unexpected error occurred with status code: {response.status_code}")

except requests.exceptions.RequestException as e:
    print(f"Network error: {e}")
except zipfile.BadZipFile:
    print("The downloaded file is not a valid ZIP file.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

Attempting to download GDELT file for 20250921...
File downloaded successfully.


  df = pd.read_csv(zip_ref.open(csv_file_name), sep='\t', header=None, encoding='ISO-8859-1')


Data loaded into DataFrame.

Data successfully exported to 'gdelt_events_20250921.xlsx'.


In [None]:
import requests
import zipfile
import io
import pandas as pd
from datetime import date, timedelta

# List to store data from each day
all_data = []

# Get yesterday's date
yesterday = date.today() - timedelta(days=1)
# Calculate the date 7 days ago, relative to yesterday
start_date = yesterday - timedelta(days=6)

print(f"Attempting to download GDELT data from {start_date} to {yesterday}...")

# Loop through the last 7 complete days (from 7 days ago until yesterday)
for i in range(7):
    # Calculate the date for the current iteration
    current_date = start_date + timedelta(days=i)
    date_str = current_date.strftime("%Y%m%d")

    # Construct the URL for the daily file
    url = f"http://data.gdeltproject.org/events/{date_str}.export.CSV.zip"
    
    try:
        # Download the compressed file
        response = requests.get(url)

        # Check if the download was successful
        if response.status_code == 200:
            print(f"File for {date_str} downloaded successfully.")
            
            # Read the zip content into memory
            zip_content = io.BytesIO(response.content)
            
            # Decompress the CSV file and load into a pandas DataFrame
            with zipfile.ZipFile(zip_content, 'r') as zip_ref:
                csv_file_name = zip_ref.namelist()[0]
                df = pd.read_csv(zip_ref.open(csv_file_name), sep='\t', header=None, encoding='ISO-8859-1')
            
            # Add a column for the date to the DataFrame
            df['Date'] = current_date
            
            # Append the DataFrame to the list
            all_data.append(df)
            
        elif response.status_code == 404:
            print(f"Skipping {date_str}: The file was not found (Error 404). This is expected for today's data.")
        else:
            print(f"An unexpected error occurred for {date_str} with status code: {response.status_code}")

    except requests.exceptions.RequestException as e:
        print(f"Network error while downloading {date_str}: {e}")
    except zipfile.BadZipFile:
        print(f"The downloaded file for {date_str} is not a valid ZIP file.")
    except Exception as e:
        print(f"An unexpected error occurred while processing {date_str}: {e}")

# Check if any data was downloaded
if all_data:
    print("\nConsolidating all downloaded data...")
    # Concatenate all DataFrames into a single one
    final_df = pd.concat(all_data, ignore_index=True)
    
    # Define the Excel file name
    excel_filename = "gdelt_events_last_7_complete_days.xlsx"
    
    try:
        # Export the final DataFrame to an Excel file
        final_df.to_excel(excel_filename, index=False)
        print(f"\nData successfully exported to '{excel_filename}'.")
    except Exception as e:
        print(f"An error occurred while exporting to Excel: {e}")
else:
    print("\nNo data was downloaded. The Excel file was not created.")

Attempting to download GDELT data from 2025-09-16 to 2025-09-22...
File for 20250916 downloaded successfully.


  df = pd.read_csv(zip_ref.open(csv_file_name), sep='\t', header=None, encoding='ISO-8859-1')


File for 20250917 downloaded successfully.
File for 20250918 downloaded successfully.


  df = pd.read_csv(zip_ref.open(csv_file_name), sep='\t', header=None, encoding='ISO-8859-1')


File for 20250919 downloaded successfully.


  df = pd.read_csv(zip_ref.open(csv_file_name), sep='\t', header=None, encoding='ISO-8859-1')


File for 20250920 downloaded successfully.
File for 20250921 downloaded successfully.


  df = pd.read_csv(zip_ref.open(csv_file_name), sep='\t', header=None, encoding='ISO-8859-1')


File for 20250922 downloaded successfully.

Consolidating all downloaded data...
