<a href="https://colab.research.google.com/github/gerritgr/Spatio-Temporal-Correlation/blob/main/Download_Weather_Data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from datetime import datetime, timedelta
import random
import time
import pandas as pd
from meteostat import Point, Daily

# Set date range
start_date = datetime(2015, 1, 1)
end_date = datetime(2016, 12, 31)

# NYC latitude and longitude ranges
lat_min = 40.4774
lat_max = 40.9176
lon_min = -74.2591
lon_max = -73.7004

# Initialize list to store data
data_list = []

# Loop to fetch data for 1000 random dates and locations
for i in range(100000):
    # Generate a random date between start_date and end_date
    random_date = start_date + timedelta(days=random.randint(0, (end_date - start_date).days))
    # Generate a random latitude and longitude within NYC
    random_lat = random.uniform(lat_min, lat_max)
    random_lon = random.uniform(lon_min, lon_max)

    # Create a Point for the random location
    location = Point(random_lat, random_lon)

    # Get daily data for the random date
    try:
        data = Daily(location, random_date, random_date)
        data = data.fetch()

        if not data.empty:
            # Get the average temperature
            tavg = data.iloc[0]['tavg']
            # Append the data to the list
            data_list.append({
                'date': random_date.strftime('%Y-%m-%d'),
                'latitude': random_lat,
                'longitude': random_lon,
                'tavg': tavg
            })
        else:
            # If data is empty, note that data is missing
            data_list.append({
                'date': random_date.strftime('%Y-%m-%d'),
                'latitude': random_lat,
                'longitude': random_lon,
                'tavg': None
            })
    except Exception as e:
        print(f"Error fetching data for date {random_date.date()} and location ({random_lat}, {random_lon}): {e}")
        data_list.append({
            'date': random_date.strftime('%Y-%m-%d'),
            'latitude': random_lat,
            'longitude': random_lon,
            'tavg': None
        })

    # Save to CSV every 10 requests
    if (i + 1) % 100 == 0:
        df = pd.DataFrame(data_list)
        df.to_csv('temperature_nyc_2015_2016_data.csv', index=False)
        print(f"Saved {i + 1} records to 'temperature_data.csv'")

    # Sleep to avoid hitting rate limits
    time.sleep(0.1)
