In [6]:
#Optional: scrapper used to collect original data. Not required to run analysis.
import requests
import pandas as pd
from datetime import datetime, timedelta

def scrape_hunza_weather_6months():
    """
    Scrape 2 years of weather data for Hunza Valley using Open-Meteo API
    """
    # Karimabad area
    latitude = 36.3167
    longitude = 74.6500
    
    #dates (last 25 years)
    end_date = datetime.now()
    start_date = end_date - timedelta(days=9476)
    
    # Format dates for API
    start = start_date.strftime('%Y-%m-%d')
    end = end_date.strftime('%Y-%m-%d')
    
    print(f"Fetching weather data for given cordinates")
    print(f" from: {start} to {end}\n")
    
    # Open-Meteo endpoint
    url = "https://archive-api.open-meteo.com/v1/archive"
    
    params = {
        'latitude': latitude,
        'longitude': longitude,
        'start_date': start,
        'end_date': end,
        'daily': [
            'temperature_2m_max',
            'temperature_2m_min',
            'temperature_2m_mean',
            'apparent_temperature_max',
            'apparent_temperature_min',
            'precipitation_sum',
            'rain_sum',
            'snowfall_sum',
            'precipitation_hours',
            'weathercode',
            'sunrise',
            'sunset',
            'sunshine_duration',
            'daylight_duration',
            'windspeed_10m_max',
            'windgusts_10m_max',
            'winddirection_10m_dominant',
            'shortwave_radiation_sum',
            'et0_fao_evapotranspiration'
        ],
        'timezone': 'Asia/Karachi'
    }
    
    try:
        response = requests.get(url, params=params)
        response.raise_for_status()
        
        data = response.json()
        
        #DataFrame
        df = pd.DataFrame({
            'date': data['daily']['time'],
            'max_temperature_celsius': data['daily']['temperature_2m_max'],
            'min_temperature_celsius': data['daily']['temperature_2m_min'],
            'average_temperature_celsius': data['daily']['temperature_2m_mean'],
            'feels_like_max_celsius': data['daily']['apparent_temperature_max'],
            'feels_like_min_celsius': data['daily']['apparent_temperature_min'],
            'total_precipitation_mm': data['daily']['precipitation_sum'],
            'rainfall_mm': data['daily']['rain_sum'],
            'snowfall_cm': data['daily']['snowfall_sum'],
            'hours_of_precipitation': data['daily']['precipitation_hours'],
            'weather_condition_code': data['daily']['weathercode'],
            'sunrise_time': data['daily']['sunrise'],
            'sunset_time': data['daily']['sunset'],
            'sunshine_duration_seconds': data['daily']['sunshine_duration'],
            'daylight_duration_seconds': data['daily']['daylight_duration'],
            'max_wind_speed_kmh': data['daily']['windspeed_10m_max'],
            'max_wind_gust_kmh': data['daily']['windgusts_10m_max'],
            'wind_direction_degrees': data['daily']['winddirection_10m_dominant'],
            'solar_radiation_MJ_per_m2': data['daily']['shortwave_radiation_sum'],
            'water_evaporation_mm': data['daily']['et0_fao_evapotranspiration']
        })
        
        #sunshine/daylight duration to hours
        df['sunshine_hours'] = df['sunshine_duration_seconds'] / 3600
        df['daylight_hours'] = df['daylight_duration_seconds'] / 3600
        
        # CSV
        filename = 'hunza_weather_2years.csv'
        df.to_csv(filename, index=False)
        
        print(f"Success! Data saved to '{filename}'")
        print(f"Total records: {len(df)} days")
        print(f"Temperature range: {df['min_temperature_celsius'].min():.1f}°C to {df['max_temperature_celsius'].max():.1f}°C")
        print(f"Coldest feels-like: {df['feels_like_min_celsius'].min():.1f}°C")
        print(f" Hottest feels-like: {df['feels_like_max_celsius'].max():.1f}°C")
        print(f"Total precipitation: {df['total_precipitation_mm'].sum():.1f} mm")
        print(f"Total snowfall: {df['snowfall_cm'].sum():.1f} cm")
        print(f"Average sunshine: {df['sunshine_hours'].mean():.1f} hours/day")
        print(f"Max wind gust: {df['max_wind_gust_kmh'].max():.1f} km/h")
        
        print("\n First 5 rows of data:")
        print(df.head().to_string(index=False))
        
        print("\n Last 5 rows of data:")
        print(df.tail().to_string(index=False))
        
        # Basic statistics
        print("\n Summary Statistics:")
        print(df.describe().round(2))
        
        return df
        
    except requests.exceptions.RequestException as e:
        print(f" Error getting data: {e}")
        return None
    except Exception as e:
        print(f" Error processing data: {e}")
        return None


if __name__ == "__main__":
    # Run
    weather_data = scrape_hunza_weather_6months()
    
    if weather_data is not None:
        print("\n  saved csv file sucessfully ")
    else:
        print("\n Failed to fetch weather data.Dumb fuck correct your pipeline")

Fetching weather data for given cordinates
 from: 2000-01-01 to 2025-12-11

Success! Data saved to 'hunza_weather_2years.csv'
Total records: 9477 days
Temperature range: -28.9°C to 34.3°C
Coldest feels-like: -33.6°C
 Hottest feels-like: 36.6°C
Total precipitation: 22265.1 mm
Total snowfall: 11971.0 cm
Average sunshine: 9.8 hours/day
Max wind gust: 110.9 km/h

 First 5 rows of data:
      date  max_temperature_celsius  min_temperature_celsius  average_temperature_celsius  feels_like_max_celsius  feels_like_min_celsius  total_precipitation_mm  rainfall_mm  snowfall_cm  hours_of_precipitation  weather_condition_code     sunrise_time      sunset_time  sunshine_duration_seconds  daylight_duration_seconds  max_wind_speed_kmh  max_wind_gust_kmh  wind_direction_degrees  solar_radiation_MJ_per_m2  water_evaporation_mm  sunshine_hours  daylight_hours
2000-01-01                     -2.7                    -13.5                         -9.2                    -6.1                   -17.8          