In [None]:
# OpenWeatherMap How to make an API call https://openweathermap.org/history

In [None]:
import pandas as pd
import requests
from datetime import datetime, timedelta
import time

In [None]:
# Function to fetch weather data for a given date range
def fetch_weather_data(start_date, end_date):
    # Initialize an empty list to store weather data
    weather_data = []
    # Loop through each day in the date range
    current_date = start_date
    while current_date <= end_date:
        timestamp = int(current_date.timestamp())

        # Construct the API URL
        url = f'http://history.openweathermap.org/data/2.5/history/city?id=1850147&type=hour&start={timestamp}&end={timestamp}&appid=your_api_key&units=metric'

        # Send the request to the OpenWeatherMap API
        response = requests.get(url)

        # Check if the request was successful
        if response.status_code == 200:
            data = response.json()

            # Iterate over the forecast data
            for history in data['list']:
                # Convert temperatures from Celsius to Fahrenheit
                temp_max = (history['main']['temp_max']) * 9/5 + 32
                temp_min = (history['main']['temp_min']) * 9/5 + 32
                humidity = history['main']['humidity']
                pressure = history['main']['pressure']
                wind_speed = history['wind']['speed']
                wind_deg = history['wind']['deg']
                weather = history['weather'][0]['main']
                description = history['weather'][0]['description']
                rainfall = history.get('rain', {}).get('1h', 0)
                clouds = history['clouds']['all']
                date = datetime.fromtimestamp(history['dt']).strftime('%m/%d/%Y %H:%M:%S')

                print(f"{date}: Successful")

                # Append the collected data to the weather_data list
                weather_data.append({
                    'Date': date,
                    'Max Temperature (F)': round(temp_max, 2),
                    'Min Temperature (F)': round(temp_min, 2),
                    'Humidity (%)': humidity,
                    'Weather': weather,
                    'Description': description,
                    'Rainfall (mm)': rainfall,
                    'Pressure (hPa)': pressure,
                    'Wind Speed (m/s)': wind_speed,
                    'Wind Direction (°)': wind_deg,
                    'Clouds (%)': clouds
                })
        else:
            print(f"Error fetching weather data for {current_date}")

        # Update the current date to the next day
        current_date = datetime.fromtimestamp(current_date.timestamp() + 3600)

    return weather_data

In [None]:
# Define the start and end dates for the first 6 months
start_date1 = datetime(2023, 7, 29)
end_date1 = start_date1 + timedelta(days=180)

In [None]:
# Define the start and end dates for the second 6 months
start_date2 = end_date1 + timedelta(days=1)
end_date2 = datetime(2024, 7, 29)

In [None]:
# Fetch the data for each period
weather_data_1 = fetch_weather_data(start_date1, end_date1)
weather_data_2 = fetch_weather_data(start_date2, end_date2)

In [None]:
# Convert the lists to DataFrames
df1 = pd.DataFrame(weather_data_1)
df2 = pd.DataFrame(weather_data_2)

In [None]:
# Export the DataFrame to a CSV file
df1.to_csv('df1.csv', index=False)
df2.to_csv('df2.csv', index=False)

In [None]:
# Fetch data for the specific date range
start_date3 = datetime(2024, 1, 25)
end_date3 = datetime(2024, 1, 26)
weather_data_3 = fetch_weather_data(start_date3, end_date3)
df3 = pd.DataFrame(weather_data_3)

In [None]:
# Combine the two DataFrames
combined_data_df = pd.concat([df1, df3, df2], ignore_index=True)
combined_data_df

In [None]:
# Identify duplicate rows based on the 'Date' column
duplicates_by_date = combined_data_df.duplicated(subset=['Date'])
print("Duplicate rows based on 'Date' column:\n", combined_data_df[duplicates_by_date]['Date'], len(combined_data_df[duplicates_by_date]['Date']))

In [None]:
# Remove duplicates
df_cleaned = combined_data_df.drop_duplicates(subset=['Date'])

In [None]:
# Export the DataFrame to a CSV file
df_cleaned.to_csv('weather-yearly-historical-data-tokyo_072824.csv', index=False)