# Part A: Data Collection & Processing
## Weather Emergency Prediction -  Region

This notebook handles:
- Data loading from Excel/CSV files
- Data preprocessing and cleaning
- Feature engineering
- Data visualization

In [None]:
# Install required packages
# https://l4o2un2sj0p.sg.larksuite.com/wiki/VdF6wcDKAiV4Omkh2XVlOtzvgEh?from=from_copylink

!pip install pandas numpy scikit-learn matplotlib seaborn plotly openpyxl

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime, timedelta
from scipy.interpolate import interp1d
from sklearn.preprocessing import StandardScaler

# Set random seed for reproducibility
np.random.seed(42)

print("✅ Libraries imported successfully")

## 1. Data Generation/Loading

### Option 1: Upload your own Excel/CSV files
### Option 2: Generate synthetic data

In [None]:
# Configuration
ROSTOV_COORDS = {'lat': 47.2357, 'lon': 39.7015}
START_YEAR = 2015
YEARS_OF_DATA = 30

def generate_weather_data(start_year=2015, num_years=30):
    """Generate synthetic weather data for Rostov region."""
    print(f"Generating {num_years} years of weather data...")
    
    # Generate date range
    start_date = datetime(start_year, 1, 1)
    end_date = start_date + timedelta(days=365 * num_years)
    dates = pd.date_range(start=start_date, end=end_date, freq='D')
    
    n = len(dates)
    day_of_year = dates.dayofyear
    
    # Temperature with seasonal pattern (Rostov climate)
    temp_base = 10 + 15 * np.sin(2 * np.pi * day_of_year / 365)
    temperature = temp_base + np.random.normal(0, 5, n)
    
    # Precipitation with seasonal variation
    precip_prob = 0.3 + 0.2 * np.sin(2 * np.pi * day_of_year / 365 + np.pi/2)
    precipitation = np.random.gamma(2, 5, n) * (np.random.random(n) < precip_prob)
    
    # Humidity
    humidity = np.clip(
        50 + 20 * np.sin(2 * np.pi * day_of_year / 365 + np.pi/2) + np.random.normal(0, 10, n),
        0, 100
    )
    
    # Wind speed
    wind_speed = np.abs(np.random.gamma(3, 2, n))
    
    # Pressure
    pressure = 1013 + np.random.normal(0, 10, n)
    
    df = pd.DataFrame({
        'date': dates,
        'latitude': ROSTOV_COORDS['lat'],
        'longitude': ROSTOV_COORDS['lon'],
        'temperature': temperature,
        'precipitation': precipitation,
        'humidity': humidity,
        'wind_speed': wind_speed,
        'pressure': pressure
    })
    
    print(f"✅ Generated {len(df)} days of weather data")
    return df

# Generate weather data
weather_df = generate_weather_data(START_YEAR, YEARS_OF_DATA)
weather_df.head()

In [None]:
def generate_emergency_data(weather_df):
    """Generate emergency events based on weather conditions."""
    print("Generating emergency events...")
    
    emergencies = []
    
    for idx, row in weather_df.iterrows():
        # Heatwave: temp > 35°C
        if row['temperature'] > 35 and np.random.random() < 0.3:
            emergencies.append({
                'date': row['date'],
                'type': 'heatwave',
                'severity': min(10, (row['temperature'] - 35) / 2),
                'latitude': row['latitude'],
                'longitude': row['longitude']
            })
        
        # Drought: low precipitation + low humidity
        if row['precipitation'] < 1 and row['humidity'] < 30 and np.random.random() < 0.1:
            emergencies.append({
                'date': row['date'],
                'type': 'drought',
                'severity': np.random.uniform(3, 7),
                'latitude': row['latitude'],
                'longitude': row['longitude']
            })
        
        # Flood: heavy precipitation
        if row['precipitation'] > 50 and np.random.random() < 0.4:
            emergencies.append({
                'date': row['date'],
                'type': 'flood',
                'severity': min(10, row['precipitation'] / 10),
                'latitude': row['latitude'],
                'longitude': row['longitude']
            })
        
        # Frost: very low temperature
        if row['temperature'] < -20 and np.random.random() < 0.3:
            emergencies.append({
                'date': row['date'],
                'type': 'frost',
                'severity': min(10, abs(row['temperature'] + 20) / 2),
                'latitude': row['latitude'],
                'longitude': row['longitude']
            })
    
    df = pd.DataFrame(emergencies)
    print(f"✅ Generated {len(df)} emergency events")
    return df

# Generate emergency data
emergency_df = generate_emergency_data(weather_df)
emergency_df.head()

## 2. Upload Your Own Data (Alternative)

In [None]:
# Uncomment to upload your own files
# from google.colab import files
# uploaded = files.upload()

# # Load weather data
# weather_file = list(uploaded.keys())[0]
# if weather_file.endswith('.csv'):
#     weather_df = pd.read_csv(weather_file)
# else:
#     weather_df = pd.read_excel(weather_file)
# weather_df['date'] = pd.to_datetime(weather_df['date'])

# # Load emergency data
# emergency_file = list(uploaded.keys())[1]
# if emergency_file.endswith('.csv'):
#     emergency_df = pd.read_csv(emergency_file)
# else:
#     emergency_df = pd.read_excel(emergency_file)
# emergency_df['date'] = pd.to_datetime(emergency_df['date'])

## 3. Data Exploration & Visualization

In [None]:
# Basic statistics
print("=" * 50)
print("WEATHER DATA STATISTICS")
print("=" * 50)
print(weather_df.describe())
print("\n" + "=" * 50)
print("EMERGENCY DATA STATISTICS")
print("=" * 50)
print(emergency_df['type'].value_counts())

In [None]:
# Visualize temperature trends
fig = go.Figure()

# Monthly aggregation
monthly_temp = weather_df.groupby(weather_df['date'].dt.to_period('M')).agg({
    'temperature': ['mean', 'min', 'max']
}).reset_index()
monthly_temp.columns = ['date', 'mean', 'min', 'max']
monthly_temp['date'] = monthly_temp['date'].dt.to_timestamp()

fig.add_trace(go.Scatter(x=monthly_temp['date'], y=monthly_temp['mean'],
                         name='Avg Temperature', line=dict(color='orange')))
fig.add_trace(go.Scatter(x=monthly_temp['date'], y=monthly_temp['max'],
                         name='Max Temperature', line=dict(color='red', dash='dot')))
fig.add_trace(go.Scatter(x=monthly_temp['date'], y=monthly_temp['min'],
                         name='Min Temperature', line=dict(color='blue', dash='dot')))

fig.update_layout(title='Temperature Trends - Rostov-on-Don',
                  xaxis_title='Date', yaxis_title='Temperature (°C)',
                  height=500)
fig.show()

In [None]:
# Visualize emergency distribution
fig = px.pie(emergency_df, names='type', title='Emergency Types Distribution',
             hole=0.3)
fig.show()

## 4. Data Preprocessing

In [None]:
def remove_outliers(df, columns, n_std=3.0):
    """Remove outliers using z-score method."""
    df_clean = df.copy()
    
    for col in columns:
        if col in df_clean.columns:
            mean = df_clean[col].mean()
            std = df_clean[col].std()
            mask = np.abs(df_clean[col] - mean) <= n_std * std
            df_clean = df_clean[mask]
    
    print(f"Removed {len(df) - len(df_clean)} outlier rows")
    return df_clean

# Remove outliers
weather_clean = remove_outliers(weather_df, ['temperature', 'precipitation'])

In [None]:
def create_time_features(df):
    """Create time-based features."""
    df = df.copy()
    df['date'] = pd.to_datetime(df['date'])
    
    df['year'] = df['date'].dt.year
    df['month'] = df['date'].dt.month
    df['day'] = df['date'].dt.day
    df['day_of_year'] = df['date'].dt.dayofyear
    df['week_of_year'] = df['date'].dt.isocalendar().week.astype(int)
    df['season'] = (df['month'] % 12 // 3 + 1)
    
    # Cyclical encoding
    df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
    df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)
    df['day_sin'] = np.sin(2 * np.pi * df['day_of_year'] / 365)
    df['day_cos'] = np.cos(2 * np.pi * df['day_of_year'] / 365)
    
    print("✅ Time features created")
    return df

weather_clean = create_time_features(weather_clean)
weather_clean.head()

In [None]:
def create_rolling_features(df, windows=[7, 14, 30]):
    """Create rolling window statistics."""
    df = df.copy().sort_values('date')
    
    features = ['temperature', 'precipitation', 'humidity', 'wind_speed', 'pressure']
    
    for feature in features:
        if feature in df.columns:
            for window in windows:
                df[f'{feature}_rolling_mean_{window}d'] = \
                    df[feature].rolling(window=window, min_periods=1).mean()
                df[f'{feature}_rolling_std_{window}d'] = \
                    df[feature].rolling(window=window, min_periods=1).std()
                df[f'{feature}_rolling_min_{window}d'] = \
                    df[feature].rolling(window=window, min_periods=1).min()
                df[f'{feature}_rolling_max_{window}d'] = \
                    df[feature].rolling(window=window, min_periods=1).max()
    
    print(f"✅ Rolling features created for windows: {windows}")
    return df

weather_clean = create_rolling_features(weather_clean, windows=[7, 14])
print(f"Total features: {len(weather_clean.columns)}")

In [None]:
def create_lag_features(df, lags=[1, 3, 7]):
    """Create lagged features."""
    df = df.copy().sort_values('date')
    
    features = ['temperature', 'precipitation', 'humidity']
    
    for feature in features:
        if feature in df.columns:
            for lag in lags:
                df[f'{feature}_lag_{lag}d'] = df[feature].shift(lag)
    
    print(f"✅ Lag features created for lags: {lags}")
    return df

weather_clean = create_lag_features(weather_clean, lags=[1, 3, 7])
print(f"Total features: {len(weather_clean.columns)}")

## 5. Merge Weather and Emergency Data

In [None]:
def merge_weather_emergency(weather_df, emergency_df, window_days=3):
    """Merge weather and emergency data."""
    df = weather_df.copy()
    df['has_emergency'] = 0
    df['emergency_type'] = 'none'
    df['emergency_severity'] = 0.0
    
    if len(emergency_df) > 0:
        for _, emg in emergency_df.iterrows():
            emg_date = pd.to_datetime(emg['date'])
            mask = (
                (df['date'] >= emg_date - timedelta(days=window_days)) &
                (df['date'] <= emg_date)
            )
            df.loc[mask, 'has_emergency'] = 1
            df.loc[mask, 'emergency_type'] = emg['type']
            df.loc[mask, 'emergency_severity'] = emg['severity']
    
    print(f"✅ Merged data: {df['has_emergency'].sum()} emergency days out of {len(df)} total days")
    return df

# Merge data
merged_df = merge_weather_emergency(weather_clean, emergency_df, window_days=3)
merged_df.head(10)

## 6. Save Processed Data

In [None]:
# Save to CSV for use in Part B
weather_clean.to_csv('weather_rostov_processed.csv', index=False)
emergency_df.to_csv('emergencies_rostov.csv', index=False)
merged_df.to_csv('merged_data_rostov.csv', index=False)

print("✅ Data saved successfully!")
print("\nFiles created:")
print("- weather_rostov_processed.csv")
print("- emergencies_rostov.csv")
print("- merged_data_rostov.csv")

# Download files (uncomment if needed)
# from google.colab import files
# files.download('weather_rostov_processed.csv')
# files.download('emergencies_rostov.csv')
# files.download('merged_data_rostov.csv')

## Summary

✅ **Completed:**
- Generated/Loaded 30 years of weather data for Rostov
- Generated emergency events based on climatic criteria
- Created time-based features (month, season, cyclical encodings)
- Created rolling statistics (7-day, 14-day windows)
- Created lag features (1, 3, 7 days)
- Merged weather and emergency data
- Saved processed data

**Next:** Continue to Part B - Model Development


$path = "$env:USERPROFILE\.claude\settings.json"
mkdir "$env:USERPROFILE\.claude" -Force
@'
{
  "env": {
    "ANTHROPIC_API_KEY": "sk-",
    "ANTHROPIC_BASE_URL": "https://code.ppchat.vip"
  },
  "permissions": {
    "allow": [],
    "deny": []
  },
  "apiKeyHelper": "echo 'sk-'"
}
'@ | Out-File -Encoding utf8 $path