In [None]:
# -*- coding: utf-8 -*-
"""
AI-Powered Climate Change Monitoring Demo

Based on the project description: AI-Powered Climate Change Monitoring and Mitigation System.
This Colab notebook demonstrates core concepts using live data:
1. Data Acquisition (Weather API)
2. Data Visualization
3. Short-Term Forecasting (Temperature Prediction)
4. Anomaly Detection
"""

# @title 1. Project Setup and Dependencies
# Install necessary libraries (if not already included in Colab)
!pip install requests pandas numpy matplotlib seaborn scikit-learn tensorflow plotly kaleido folium statsmodels --quiet

import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import folium
import json
from datetime import datetime, timedelta
import time # For potential API rate limiting

print("Libraries installed and imported.")

# @title 2. Configuration
# --- Parameters for Data Acquisition ---

# Using Open-Meteo API (free, no key required for basic use)
# Reference: https://open-meteo.com/

# Coordinates for a specific location (e.g., London, UK)
LATITUDE = 51.5074
LONGITUDE = -0.1278
LOCATION_NAME = "London, UK"

# Weather variables to fetch
# Options: temperature_2m, relativehumidity_2m, apparent_temperature, precipitation, rain, snowfall,
#          weathercode, surface_pressure, cloudcover, et0_fao_evapotranspiration, vapor_pressure_deficit,
#          windspeed_10m, winddirection_10m, windgusts_10m, soil_temperature_0_to_7cm, soil_moisture_0_to_7cm etc.
# Also possible: air quality variables like pm2_5, carbon_monoxide, nitrogen_dioxide, sulphur_dioxide, ozone (needs air_quality model in API call)
# Check API docs for availability. We'll start simple.
HOURLY_VARIABLES = ["temperature_2m", "precipitation", "relativehumidity_2m"]

# How many past days of data to fetch for analysis and training
PAST_DAYS = 90 # Fetch ~3 months of recent historical data

# --- Parameters for Models ---
N_STEPS_LSTM = 5  # Number of time steps to look back for LSTM prediction
N_FEATURES_LSTM = 1 # We will predict temperature based on past temperature

ANOMALY_CONTAMINATION = 0.02 # Expected proportion of outliers in the data for Isolation Forest


# @title 3. Data Acquisition (Live API Call)

def fetch_climate_data(latitude, longitude, past_days, hourly_vars):
    """Fetches historical weather data from Open-Meteo API."""
    print(f"Fetching data for coordinates: ({latitude}, {longitude})")
    BASE_URL = "https://archive-api.open-meteo.com/v1/archive" # Use archive API for recent past

    # Calculate date range
    end_date = datetime.now().strftime('%Y-%m-%d')
    start_date = (datetime.now() - timedelta(days=past_days)).strftime('%Y-%m-%d')

    params = {
        "latitude": latitude,
        "longitude": longitude,
        "start_date": start_date,
        "end_date": end_date,
        "hourly": ",".join(hourly_vars),
        "timezone": "auto" # Use local timezone of the coordinates
    }

    try:
        response = requests.get(BASE_URL, params=params)
        response.raise_for_status() # Raise HTTPError for bad responses (4XX or 5XX)
        data = response.json()
        print("Data fetched successfully!")
        return data

    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {e}")
        return None
    except json.JSONDecodeError:
        print("Error decoding JSON response.")
        print("Response text:", response.text) # Print response if it's not valid JSON
        return None

# Fetch the data
raw_data = fetch_climate_data(LATITUDE, LONGITUDE, PAST_DAYS, HOURLY_VARIABLES)

# Process the data if fetched successfully
if raw_data:
    hourly_data = raw_data.get('hourly', {})
    df = pd.DataFrame(hourly_data)

    if 'time' not in df.columns or df.empty:
         print("Error: 'time' column not found in hourly data or data is empty.")
         print("Raw Data Structure:", raw_data) # Helps debug API response issues
         df = pd.DataFrame() # Set df to empty to avoid downstream errors
    else:
        df['time'] = pd.to_datetime(df['time'])
        df.set_index('time', inplace=True)
        print(f"\nDataframe created with shape: {df.shape}")
        print("\nFirst 5 rows:")
        print(df.head())
        print("\nLast 5 rows:")
        print(df.tail())
        print("\nData Info:")
        df.info()
else:
    print("Could not proceed without data.")
    # Assign an empty dataframe to prevent errors in subsequent cells if execution continues
    df = pd.DataFrame()

# @title 4. Data Preprocessing and Cleaning

if not df.empty:
    # Check for missing values
    print("\nMissing values per column:")
    print(df.isnull().sum())

    # Simple imputation: Forward fill for time series data
    # More sophisticated methods could be used (interpolation, model-based imputation)
    df.ffill(inplace=True) # Forward fill handles missing sensor readings potentially
    df.bfill(inplace=True) # Back fill handles any remaining NaNs at the beginning

    print("\nMissing values after forward/backward fill:")
    print(df.isnull().sum())

    # Ensure numeric types (sometimes API might return strings if errors occur)
    for col in HOURLY_VARIABLES:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce') # Coerce converts errors to NaT/NaN

    # Re-check and drop rows if any NaNs persisted after coercion (shouldn't happen with ffill/bfill unless all data for a col was bad)
    df.dropna(inplace=True)

    print(f"\nDataframe shape after cleaning: {df.shape}")
    print("\nBasic Statistics:")
    print(df.describe())
else:
    print("Skipping preprocessing as DataFrame is empty.")


# @title 5. Exploratory Data Analysis (EDA) & Visualization

if not df.empty:
    print("\n--- Generating Visualizations ---")

    # --- 5.1 Time Series Plots (Interactive with Plotly) ---
    print("\nPlotting Time Series...")
    fig_ts = go.Figure()

    for col in HOURLY_VARIABLES:
        fig_ts.add_trace(go.Scatter(x=df.index, y=df[col], mode='lines', name=col))

    fig_ts.update_layout(
        title=f'Hourly Climate Variables for {LOCATION_NAME} (Last {PAST_DAYS} Days)',
        xaxis_title='Time',
        yaxis_title='Value',
        legend_title='Variable',
        hovermode="x unified" # Nice hover effect
    )
    fig_ts.show()

    # --- 5.2 Distribution Plots (Histograms using Plotly Express) ---
    print("\nPlotting Distributions...")
    for col in HOURLY_VARIABLES:
      fig_hist = px.histogram(df, x=col, title=f'Distribution of {col}')
      fig_hist.show()

    # --- 5.3 Location Map (using Folium) ---
    print("\nGenerating Location Map...")
    m = folium.Map(location=[LATITUDE, LONGITUDE], zoom_start=10)
    folium.Marker(
        [LATITUDE, LONGITUDE],
        popup=f"{LOCATION_NAME}\nLat: {LATITUDE}\nLon: {LONGITUDE}",
        tooltip=LOCATION_NAME
    ).add_to(m)
    display(m) # Display map in Colab output

else:
    print("Skipping Visualization as DataFrame is empty.")


# @title 6. Climate Prediction Model (Short-Term Temperature Forecast - LSTM)

if not df.empty and 'temperature_2m' in df.columns and len(df) > N_STEPS_LSTM * 2: # Ensure enough data for train/test split
    print("\n--- Building Short-Term Temperature Forecast Model (LSTM) ---")

    # Select Temperature data and scale it
    temp_data = df['temperature_2m'].values.reshape(-1, 1)
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(temp_data)

    # Create sequences for LSTM
    def create_sequences(data, n_steps):
        X, y = [], []
        for i in range(len(data)):
            end_ix = i + n_steps
            if end_ix > len(data)-1:
                break
            seq_x, seq_y = data[i:end_ix], data[end_ix]
            X.append(seq_x)
            y.append(seq_y)
        return np.array(X), np.array(y)

    X, y = create_sequences(scaled_data, N_STEPS_LSTM)

    # Reshape for LSTM [samples, timesteps, features]
    X = X.reshape((X.shape[0], X.shape[1], N_FEATURES_LSTM))

    # Split data (using first 80% for training, rest for testing)
    split_idx = int(len(X) * 0.8)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]
    time_index_test = df.index[N_STEPS_LSTM:][split_idx:] # Keep track of the timestamps for plotting test results

    print(f"Training data shape: X={X_train.shape}, y={y_train.shape}")
    print(f"Testing data shape: X={X_test.shape}, y={y_test.shape}")

    # Build the LSTM Model
    model_lstm = Sequential()
    model_lstm.add(LSTM(50, activation='relu', input_shape=(N_STEPS_LSTM, N_FEATURES_LSTM)))
    # model_lstm.add(LSTM(50, activation='relu')) # Optional second layer
    model_lstm.add(Dense(1))
    model_lstm.compile(optimizer='adam', loss='mse') # Mean Squared Error loss

    # Train the Model
    print("\nTraining LSTM model...")
    # Increase epochs for better performance, but keep low for demo speed
    history = model_lstm.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.1, verbose=0) # verbose=0 to keep output clean
    print("Training complete.")

    # Plot training loss
    plt.figure(figsize=(10, 5))
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('LSTM Model Training Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss (MSE)')
    plt.legend()
    plt.grid(True)
    plt.show()


    # Make Predictions
    y_pred_scaled = model_lstm.predict(X_test)
    y_pred = scaler.inverse_transform(y_pred_scaled) # Inverse scale to get actual temperature values
    y_test_actual = scaler.inverse_transform(y_test) # Inverse scale the true values for comparison

    # Visualize Predictions vs Actual
    print("\nPlotting Forecast vs Actual...")
    fig_pred = go.Figure()
    fig_pred.add_trace(go.Scatter(x=time_index_test, y=y_test_actual.flatten(), mode='lines', name='Actual Temperature'))
    fig_pred.add_trace(go.Scatter(x=time_index_test, y=y_pred.flatten(), mode='lines', name='Predicted Temperature (LSTM)'))

    fig_pred.update_layout(
        title=f'Temperature Forecast vs Actual for {LOCATION_NAME} (Test Set)',
        xaxis_title='Time',
        yaxis_title='Temperature (°C)',
        legend_title='Data',
         hovermode="x unified"
    )
    fig_pred.show()

    # Calculate some simple error metric (e.g., Mean Absolute Error)
    mae = np.mean(np.abs(y_pred - y_test_actual))
    print(f"\nMean Absolute Error (MAE) on Test Set: {mae:.2f} °C")
    print("\nNote: This is a simple short-term forecast based only on recent temperature history.")
    print("Real climate prediction requires much more data and complex models.")

else:
     print("Skipping Prediction: DataFrame is empty, 'temperature_2m' column is missing, or not enough data points.")

# @title 7. Anomaly Detection (using Isolation Forest)

if not df.empty and 'temperature_2m' in df.columns:
    print("\n--- Performing Anomaly Detection on Temperature Data ---")

    # Using Isolation Forest - good for finding outliers
    model_iforest = IsolationForest(n_estimators=100, contamination=ANOMALY_CONTAMINATION, random_state=42)

    # Fit on the temperature data (or could use multiple features)
    temp_data_if = df[['temperature_2m']].copy() # Select data, ensure it's a DataFrame
    model_iforest.fit(temp_data_if)

    # Predict outliers: -1 for anomalies, 1 for inliers
    df['anomaly_temp'] = model_iforest.predict(temp_data_if)

    # Extract anomalies
    anomalies = df[df['anomaly_temp'] == -1]
    print(f"\nDetected {len(anomalies)} potential anomalies in temperature data.")

    # Visualize Anomalies
    print("\nPlotting Temperature with Detected Anomalies...")
    fig_anomaly = go.Figure()

    # Plot all temperature data
    fig_anomaly.add_trace(go.Scatter(
        x=df.index,
        y=df['temperature_2m'],
        mode='lines',
        name='Temperature (°C)'
    ))

    # Add markers for anomalies
    fig_anomaly.add_trace(go.Scatter(
        x=anomalies.index,
        y=anomalies['temperature_2m'],
        mode='markers',
        marker=dict(color='red', size=8, symbol='x'),
        name='Detected Anomaly'
    ))

    fig_anomaly.update_layout(
        title=f'Temperature Time Series with Anomaly Detection for {LOCATION_NAME}',
        xaxis_title='Time',
        yaxis_title='Temperature (°C)',
        legend_title='Data',
         hovermode="x unified"
    )
    fig_anomaly.show()

    if not anomalies.empty:
      print("\nDetails of detected anomalies:")
      print(anomalies[['temperature_2m', 'anomaly_temp']])

else:
    print("Skipping Anomaly Detection: DataFrame is empty or 'temperature_2m' column is missing.")


# @title 8. Simple Dashboard / Summary Visualization

# This section combines key plots into a simple overview.
# In a real application, this would use dashboarding tools like Dash/Streamlit, Power BI, Tableau.

if not df.empty:
    print("\n--- Summary Dashboard Components ---")

    # Re-displaying key plots generated above for a "dashboard-like" summary in Colab
    print("\n1. Time Series Overview:")
    if 'fig_ts' in locals(): # Check if plot was generated
      fig_ts.show()
    else:
      print("Time series plot not available.")


    print("\n2. Temperature Forecast (Test Set):")
    if 'fig_pred' in locals():
        fig_pred.show()
    else:
      print("Forecast plot not available.")

    print("\n3. Temperature Anomaly Detection:")
    if 'fig_anomaly' in locals():
        fig_anomaly.show()
    else:
      print("Anomaly plot not available.")


    print("\n4. Location Context:")
    if 'm' in locals():
      display(m)
    else:
      print("Map not available.")

else:
    print("Cannot generate summary dashboard components as DataFrame is empty.")


# @title 9. Conclusion & Future Work

print("\n--- Conclusion & Future Work ---")
print("This Colab notebook demonstrated a basic workflow for AI-powered climate monitoring:")
print("1. Fetched near real-time hourly weather data using the Open-Meteo API.")
print("2. Cleaned and visualized the time series data (temperature, precipitation, humidity).")
print("3. Implemented a short-term temperature forecast using an LSTM model.")
print("4. Applied Isolation Forest to detect anomalies in the temperature data.")

print("\nThis is a simplified demonstration based on the comprehensive system described in 'project 2.txt'.")

print("\nPotential Future Enhancements (Closer to the Full Vision):")
print("- **Integrate More Data Sources:** Add satellite imagery (e.g., NDVI, land surface temperature via Google Earth Engine API), CO2 concentration data (e.g., MethaneSAT, NOAA), sea level data, etc.")
print("- **Advanced Prediction Models:** Utilize more complex models like CNNs, RNNs (Transformers) on larger, multi-variate datasets for more accurate and longer-term climate event predictions (requires significant historical data).")
print("- **Sophisticated Anomaly Detection:** Explore multi-variate anomaly detection and algorithms robust to seasonality (e.g., Autoencoders, Prophet).")
print("- **Impact Visualization:** Develop dynamic, interactive dashboards using tools like Plotly Dash, Streamlit, Tableau, or Power BI for policymakers and public access.")
print("- **Geospatial Analysis:** Perform deeper analysis using GIS tools, correlating climate data with geographic features (deforestation, urban sprawl).")
print("- **Mitigation Simulation (Advanced):** Carefully design simulation environments to test policy impacts using Reinforcement Learning or Agent-Based Modeling (complex research area).")
print("- **Scalability & Deployment:** Move from Colab to a scalable cloud platform (AWS, GCP, Azure) using Big Data tools (Spark, Dask) and MLOps practices for continuous integration and model updates.")
print("- **Regional Customization:** Adapt models and dashboards for specific regions and climate challenges.")