In [None]:
import pandas as pd
import requests
from datetime import datetime, timedelta

# Define your OpenWeatherMap API key
API_KEY = 'YOUR_API_KEY'

# Define the geographic boundaries
lon_min = -125
lon_max = -70
lat_min = 25
lat_max = 50

# Define the time window
start_time = datetime(2020, 5, 1, 0, 0)
end_time = start_time + timedelta(days=30)  # Example: 30 days of data

# Generate a list of hourly timestamps
timestamps = pd.date_range(start=start_time, end=end_time, freq='H')

# Function to fetch historical climate observations
def fetch_observations(lat, lon):
    url = f"http://api.openweathermap.org/data/2.5/onecall/timemachine"
    observations = []
    
    for timestamp in timestamps:
        params = {
            'lat': lat,
            'lon': lon,
            'dt': int(timestamp.timestamp()),
            'appid': API_KEY,
            'units': 'metric'  # Use 'imperial' for Fahrenheit
        }
        response = requests.get(url, params=params)
        if response.status_code == 200:
            data = response.json()
            observations.append({
                'timestamp': timestamp,
                'temperature': data['current']['temp'],
                'humidity': data['current']['humidity'],
                'weather': data['current']['weather'][0]['description']
            })
        else:
            print(f"Error fetching data for {timestamp}: {response.status_code}")

    return observations

# Function to fetch forecast data
def fetch_forecasts(lat, lon):
    url = f"http://api.openweathermap.org/data/2.5/onecall"
    params = {
        'lat': lat,
        'lon': lon,
        'exclude': 'current,minutely,daily',
        'appid': API_KEY,
        'units': 'metric'  # Use 'imperial' for Fahrenheit
    }
    response = requests.get(url, params=params)
    response.raise_for_status()
    return response.json()['hourly']

# Coordinates for the center of the specified bounding box
lat_center = (lat_min + lat_max) / 2
lon_center = (lon_min + lon_max) / 2

# Fetch data
observations = fetch_observations(lat_center, lon_center)
forecasts = fetch_forecasts(lat_center, lon_center)

# Process and combine the data into a DataFrame
observations_df = pd.DataFrame(observations)

# Create a DataFrame for forecast data
forecast_timestamps = [datetime.fromtimestamp(f['dt']) for f in forecasts]
forecasts_df = pd.DataFrame({
    'timestamp': forecast_timestamps,
    'temperature': [f['temp'] for f in forecasts],
    'humidity': [f['humidity'] for f in forecasts],
    'weather': [f['weather'][0]['description'] for f in forecasts]
})

# Merge on timestamps
combined_df = pd.merge_asof(observations_df.sort_values('timestamp'),
                             forecasts_df.sort_values('timestamp'),
                             on='timestamp', direction='nearest')

# Output or save the combined data
combined_df.to_csv('climate_data_combined.csv', index=False)
print("Data ingestion complete. Combined data saved to 'climate_data_combined.csv'.")


In [None]:
import requests
import pandas as pd
from datetime import datetime, timedelta

# Function to get historical weather data
def get_historical_weather(api_key, lat, lon, date):
    timestamp = int(date.timestamp())
    url = f'http://api.openweathermap.org/data/2.5/onecall/timemachine?lat={lat}&lon={lon}&dt={timestamp}&appid={api_key}'
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error fetching data: {response.status_code}")
        return None

# Define parameters
api_key = 'YOUR_API_KEY'  # Replace with your OpenWeatherMap key
lat = 37.0  # Latitude of your area
lon = -95.0  # Longitude of your area
start_date = '2020-05-01'
end_date = '2020-05-07'

# Collect historical weather data
weather_data = []
for single_date in pd.date_range(start=start_date, end=end_date):
    data = get_historical_weather(api_key, lat, lon, single_date)
    if data:
        # Extract relevant data
        for hourly in data['hourly']:
            weather_info = {
                'date': datetime.fromtimestamp(hourly['dt']).date(),
                'temperature': hourly['temp'],
                'condition': hourly['weather'][0]['description'],
                'generated_at': data['current']['dt']  # Check this field
            }
            weather_data.append(weather_info)

# Convert to DataFrame for easier analysis
df = pd.DataFrame(weather_data)

# Display results
print(df)

# Check if data was based on information available before May 1, 2020
for index, row in df.iterrows():
    generated_time = datetime.fromtimestamp(row['generated_at'])
    if generated_time < datetime.strptime(start_date, '%Y-%m-%d'):
        print(f"Weather data for {row['date']} is based on data before May 1, 2020.")
    else:
        print(f"Weather data for {row['date']} may not be based on data before May 1, 2020.")
