# Step 2: Feature Engineering

## Objective
Create comprehensive temporal, spatial, and neighbor-based features for predictive modeling.

### Tasks:
1. Generate temporal features (month, season, year index, day of week)
2. Create lag features (t-1, t-7, rolling means)
3. Add spatial features (centroids, distances)
4. Build adjacency matrix and neighbor features
5. Engineer cross-pollutant interactions
6. Scale and normalize features

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import os
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Geospatial
from scipy.spatial import distance_matrix
from sklearn.preprocessing import StandardScaler, MinMaxScaler

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
sns.set_style('whitegrid')

print("Libraries loaded successfully!")

## 2.1 Load Master Dataset

In [None]:
# Define paths
DATA_PATH = './processed_data/'
OUTPUT_PATH = './processed_data/'

# Load master dataset
print("Loading master pollution dataset...")
master_data = pd.read_pickle(os.path.join(DATA_PATH, 'master_pollution_data.pkl'))

# Ensure date is datetime
master_data['date'] = pd.to_datetime(master_data['date'])

print(f"✓ Dataset loaded: {master_data.shape}")
print(f"Date range: {master_data['date'].min()} to {master_data['date'].max()}")
print(f"Countries: {master_data['country'].nunique()}")
print(f"\nFirst few rows:")
print(master_data.head())

## 2.2 Temporal Feature Engineering

In [None]:
def add_temporal_features(df):
    """Add comprehensive temporal features"""
    df_temp = df.copy()
    
    print("Creating temporal features...")
    
    # Basic date components
    df_temp['year'] = df_temp['date'].dt.year
    df_temp['month'] = df_temp['date'].dt.month
    df_temp['day'] = df_temp['date'].dt.day
    df_temp['day_of_week'] = df_temp['date'].dt.dayofweek  # Monday=0, Sunday=6
    df_temp['day_of_year'] = df_temp['date'].dt.dayofyear
    df_temp['week_of_year'] = df_temp['date'].dt.isocalendar().week
    df_temp['quarter'] = df_temp['date'].dt.quarter
    
    # Season (meteorological)
    def get_season(month):
        if month in [12, 1, 2]:
            return 'winter'
        elif month in [3, 4, 5]:
            return 'spring'
        elif month in [6, 7, 8]:
            return 'summer'
        else:
            return 'autumn'
    
    df_temp['season'] = df_temp['month'].apply(get_season)
    
    # Cyclical encoding for month and day_of_week
    df_temp['month_sin'] = np.sin(2 * np.pi * df_temp['month'] / 12)
    df_temp['month_cos'] = np.cos(2 * np.pi * df_temp['month'] / 12)
    df_temp['day_of_week_sin'] = np.sin(2 * np.pi * df_temp['day_of_week'] / 7)
    df_temp['day_of_week_cos'] = np.cos(2 * np.pi * df_temp['day_of_week'] / 7)
    
    # Year index (0-based from minimum year)
    min_year = df_temp['year'].min()
    df_temp['year_index'] = df_temp['year'] - min_year
    
    # Time index (days since start)
    min_date = df_temp['date'].min()
    df_temp['days_since_start'] = (df_temp['date'] - min_date).dt.days
    
    # Weekend indicator
    df_temp['is_weekend'] = (df_temp['day_of_week'] >= 5).astype(int)
    
    # Month start/end indicators
    df_temp['is_month_start'] = (df_temp['day'] <= 7).astype(int)
    df_temp['is_month_end'] = (df_temp['day'] >= 24).astype(int)
    
    print(f"✓ Created {len([c for c in df_temp.columns if c not in df.columns])} temporal features")
    
    return df_temp

# Apply temporal features
master_data = add_temporal_features(master_data)

print("\nNew columns:")
print([col for col in master_data.columns if col not in ['country', 'date', 'CO', 'NO2', 'PM10']])

## 2.3 Lag Features (Self-Pollution History)

In [None]:
def create_lag_features(df, pollutants=['CO', 'NO2', 'PM10'], lags=[1, 7, 14, 30]):
    """Create lag features for each pollutant by country"""
    df_lag = df.copy()
    df_lag = df_lag.sort_values(['country', 'date']).reset_index(drop=True)
    
    print(f"Creating lag features for {len(pollutants)} pollutants...")
    feature_count = 0
    
    for pollutant in pollutants:
        if pollutant not in df_lag.columns:
            continue
        
        print(f"  Processing {pollutant}...")
        
        # Lag features (t-1, t-7, t-14, t-30)
        for lag in lags:
            col_name = f'{pollutant}_lag_{lag}'
            df_lag[col_name] = df_lag.groupby('country')[pollutant].shift(lag)
            feature_count += 1
        
        # Rolling mean features
        for window in [7, 14, 30, 90]:
            col_name = f'{pollutant}_rolling_mean_{window}'
            df_lag[col_name] = df_lag.groupby('country')[pollutant].transform(
                lambda x: x.rolling(window=window, min_periods=1).mean()
            )
            feature_count += 1
        
        # Rolling standard deviation
        for window in [7, 30]:
            col_name = f'{pollutant}_rolling_std_{window}'
            df_lag[col_name] = df_lag.groupby('country')[pollutant].transform(
                lambda x: x.rolling(window=window, min_periods=1).std()
            )
            feature_count += 1
        
        # Exponentially weighted moving average
        for span in [7, 30]:
            col_name = f'{pollutant}_ewm_{span}'
            df_lag[col_name] = df_lag.groupby('country')[pollutant].transform(
                lambda x: x.ewm(span=span, adjust=False).mean()
            )
            feature_count += 1
        
        # Rate of change
        df_lag[f'{pollutant}_change_1d'] = df_lag.groupby('country')[pollutant].diff(1)
        df_lag[f'{pollutant}_change_7d'] = df_lag.groupby('country')[pollutant].diff(7)
        df_lag[f'{pollutant}_pct_change_1d'] = df_lag.groupby('country')[pollutant].pct_change(1)
        feature_count += 3
    
    print(f"✓ Created {feature_count} lag and rolling features")
    
    # Fill NaN values from rolling operations with forward fill
    lag_cols = [col for col in df_lag.columns if any(x in col for x in ['lag_', 'rolling_', 'ewm_', 'change_', 'pct_change_'])]
    for col in lag_cols:
        df_lag[col] = df_lag.groupby('country')[col].fillna(method='bfill').fillna(method='ffill').fillna(0)
    
    return df_lag

# Create lag features
master_data = create_lag_features(master_data)

print(f"\nDataset shape after lag features: {master_data.shape}")

## 2.4 Spatial Features - Country Centroids

We'll define approximate centroids for each country based on geographical knowledge.

In [None]:
# Define country centroids (latitude, longitude)
# These are approximate geographical centers
COUNTRY_CENTROIDS = {
    'Bangladesh': (23.685, 90.3563),
    'Germany': (51.1657, 10.4515),
    'India': (20.5937, 78.9629),
    'Japan': (36.2048, 138.2529),
    'Malaysia': (4.2105, 101.9758),
    'Nepal': (28.3949, 84.1240),
    'Norway': (60.4720, 8.4689),
    'Pakistan': (30.3753, 69.3451),
    'Singapore': (1.3521, 103.8198),
    'South Africa': (-30.5595, 22.9375),
    'Sweden': (60.1282, 18.6435),
    'Uk': (55.3781, -3.4360),
    'Usa': (37.0902, -95.7129),
    'Vietnam': (14.0583, 108.2772),
    'United Kingdom': (55.3781, -3.4360),  # Alternative name
    'United States': (37.0902, -95.7129),  # Alternative name
}

def add_spatial_features(df, centroids_dict):
    """Add spatial coordinates to dataset"""
    df_spatial = df.copy()
    
    print("Adding spatial features...")
    
    # Map centroids
    df_spatial['latitude'] = df_spatial['country'].map(lambda x: centroids_dict.get(x, (None, None))[0])
    df_spatial['longitude'] = df_spatial['country'].map(lambda x: centroids_dict.get(x, (None, None))[1])
    
    # Check for missing centroids
    missing_centroids = df_spatial[df_spatial['latitude'].isna()]['country'].unique()
    if len(missing_centroids) > 0:
        print(f"  Warning: Missing centroids for: {missing_centroids}")
        print(f"  Attempting to match with alternative names...")
        
        # Try to match similar names
        for country in missing_centroids:
            country_lower = country.lower()
            for key in centroids_dict.keys():
                if country_lower in key.lower() or key.lower() in country_lower:
                    print(f"    Matched '{country}' with '{key}'")
                    mask = df_spatial['country'] == country
                    df_spatial.loc[mask, 'latitude'] = centroids_dict[key][0]
                    df_spatial.loc[mask, 'longitude'] = centroids_dict[key][1]
                    break
    
    # Normalize coordinates to [-1, 1] range
    df_spatial['latitude_norm'] = df_spatial['latitude'] / 90.0  # Latitude range: -90 to 90
    df_spatial['longitude_norm'] = df_spatial['longitude'] / 180.0  # Longitude range: -180 to 180
    
    print(f"✓ Added spatial features (latitude, longitude, normalized coordinates)")
    
    return df_spatial

# Add spatial features
master_data = add_spatial_features(master_data, COUNTRY_CENTROIDS)

print("\nCountries with coordinates:")
coords= master_data[['country', 'latitude', 'longitude']].drop_duplicates().sort_values('country')
print(coords)

## 2.5 Spatial Adjacency Matrix

Calculate distances between countries and determine neighbors.

In [None]:
def create_adjacency_matrix(df, distance_threshold=3000):
    """
    Create spatial adjacency matrix based on centroid distances
    distance_threshold: in km (3000km = ~27 degrees)
    """
    # Get unique countries with coordinates
    country_coords = df[['country', 'latitude', 'longitude']].drop_duplicates()
    country_coords = country_coords[country_coords['latitude'].notna()].reset_index(drop=True)
    
    countries = country_coords['country'].values
    n_countries = len(countries)
    
    print(f"Creating adjacency matrix for {n_countries} countries...")
    
    # Calculate distance matrix (Haversine formula)
    def haversine_distance(lat1, lon1, lat2, lon2):
        """Calculate great circle distance in km"""
        R = 6371  # Earth radius in km
        
        lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
        dlat = lat2 - lat1
        dlon = lon2 - lon1
        
        a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
        c = 2 * np.arcsin(np.sqrt(a))
        
        return R * c
    
    # Build distance matrix
    dist_matrix = np.zeros((n_countries, n_countries))
    
    for i in range(n_countries):
        for j in range(n_countries):
            if i != j:
                dist = haversine_distance(
                    country_coords.loc[i, 'latitude'],
                    country_coords.loc[i, 'longitude'],
                    country_coords.loc[j, 'latitude'],
                    country_coords.loc[j, 'longitude']
                )
                dist_matrix[i, j] = dist
    
    # Create DataFrame
    dist_df = pd.DataFrame(dist_matrix, index=countries, columns=countries)
    
    # Create adjacency matrix (binary: 1 if neighbor, 0 otherwise)
    adjacency_matrix = (dist_matrix > 0) & (dist_matrix < distance_threshold)
    adjacency_df = pd.DataFrame(adjacency_matrix.astype(int), index=countries, columns=countries)
    
    # Create neighbor dictionary
    neighbors_dict = {}
    for country in countries:
        neighbors = adjacency_df[country][adjacency_df[country] == 1].index.tolist()
        neighbors_dict[country] = neighbors
    
    print(f"✓ Adjacency matrix created (threshold: {distance_threshold}km)")
    print(f"\nNeighbor counts:")
    for country, neighbors in neighbors_dict.items():
        print(f"  {country}: {len(neighbors)} neighbors")
    
    return dist_df, adjacency_df, neighbors_dict

# Create adjacency matrix
distance_matrix_df, adjacency_matrix_df, neighbors_dict = create_adjacency_matrix(master_data)

# Save matrices
distance_matrix_df.to_csv(os.path.join(OUTPUT_PATH, 'distance_matrix.csv'))
adjacency_matrix_df.to_csv(os.path.join(OUTPUT_PATH, 'adjacency_matrix.csv'))

# Save neighbors dictionary
import json
with open(os.path.join(OUTPUT_PATH, 'neighbors_dict.json'), 'w') as f:
    json.dump(neighbors_dict, f, indent=2)

print("\n✓ Matrices saved to disk")

## 2.6 Neighbor Pollution Features

In [None]:
def create_neighbor_features(df, neighbors_dict, pollutants=['CO', 'NO2', 'PM10']):
    """
    Create neighbor pollution features:
    - Average neighbor pollution (same day)
    - Maximum neighbor pollution
    - Weighted average by distance
    - Lagged neighbor pollution
    """
    df_neighbor = df.copy()
    df_neighbor = df_neighbor.sort_values(['date', 'country']).reset_index(drop=True)
    
    print(f"Creating neighbor pollution features...")
    feature_count = 0
    
    for pollutant in pollutants:
        if pollutant not in df_neighbor.columns:
            continue
        
        print(f"  Processing {pollutant}...")
        
        # Initialize neighbor feature columns
        df_neighbor[f'{pollutant}_neighbor_mean'] = np.nan
        df_neighbor[f'{pollutant}_neighbor_max'] = np.nan
        df_neighbor[f'{pollutant}_neighbor_min'] = np.nan
        df_neighbor[f'{pollutant}_neighbor_std'] = np.nan
        df_neighbor[f'{pollutant}_neighbor_lag_1'] = np.nan
        df_neighbor[f'{pollutant}_neighbor_lag_7'] = np.nan
        
        # For each date, calculate neighbor statistics
        for date in df_neighbor['date'].unique():
            date_mask = df_neighbor['date'] == date
            date_df = df_neighbor[date_mask]
            
            for idx, row in date_df.iterrows():
                country = row['country']
                neighbors = neighbors_dict.get(country, [])
                
                if len(neighbors) > 0:
                    # Get neighbor pollution values for this date
                    neighbor_values = date_df[date_df['country'].isin(neighbors)][pollutant]
                    
                    if len(neighbor_values) > 0:
                        df_neighbor.loc[idx, f'{pollutant}_neighbor_mean'] = neighbor_values.mean()
                        df_neighbor.loc[idx, f'{pollutant}_neighbor_max'] = neighbor_values.max()
                        df_neighbor.loc[idx, f'{pollutant}_neighbor_min'] = neighbor_values.min()
                        df_neighbor.loc[idx, f'{pollutant}_neighbor_std'] = neighbor_values.std()
        
        feature_count += 4
        
        # Create lagged neighbor features
        df_neighbor[f'{pollutant}_neighbor_lag_1'] = df_neighbor.groupby('country')[f'{pollutant}_neighbor_mean'].shift(1)
        df_neighbor[f'{pollutant}_neighbor_lag_7'] = df_neighbor.groupby('country')[f'{pollutant}_neighbor_mean'].shift(7)
        feature_count += 2
    
    print(f"✓ Created {feature_count} neighbor pollution features")
    
    # Fill NaN with 0 (for countries with no neighbors or missing data)
    neighbor_cols = [col for col in df_neighbor.columns if 'neighbor' in col]
    for col in neighbor_cols:
        df_neighbor[col] = df_neighbor.groupby('country')[col].fillna(method='bfill').fillna(method='ffill').fillna(0)
    
    return df_neighbor

# Create neighbor features (this may take a few minutes)
print("Note: This operation may take several minutes for large datasets...\n")
master_data = create_neighbor_features(master_data, neighbors_dict)

print(f"\nDataset shape after neighbor features: {master_data.shape}")

## 2.7 Cross-Pollutant Interaction Features

In [None]:
def create_interaction_features(df, pollutants=['CO', 'NO2', 'PM10']):
    """Create interaction features between pollutants"""
    df_interact = df.copy()
    
    print("Creating cross-pollutant interaction features...")
    feature_count = 0
    
    # Pairwise ratios and products
    from itertools import combinations
    
    for pol1, pol2 in combinations(pollutants, 2):
        if pol1 in df_interact.columns and pol2 in df_interact.columns:
            # Ratio
            df_interact[f'{pol1}_{pol2}_ratio'] = df_interact[pol1] / (df_interact[pol2] + 1e-6)
            # Product
            df_interact[f'{pol1}_{pol2}_product'] = df_interact[pol1] * df_interact[pol2]
            # Sum
            df_interact[f'{pol1}_{pol2}_sum'] = df_interact[pol1] + df_interact[pol2]
            feature_count += 3
    
    # Air Quality Index (AQI) proxy - simple weighted sum
    if all(pol in df_interact.columns for pol in pollutants):
        df_interact['AQI_proxy'] = (
            0.3 * df_interact['CO'] / df_interact['CO'].max() +
            0.4 * df_interact['NO2'] / df_interact['NO2'].max() +
            0.3 * df_interact['PM10'] / df_interact['PM10'].max()
        )
        feature_count += 1
    
    print(f"✓ Created {feature_count} interaction features")
    
    return df_interact

# Create interaction features
master_data = create_interaction_features(master_data)

print(f"\nDataset shape after interaction features: {master_data.shape}")

## 2.8 Feature Summary and Statistics

In [None]:
# Feature categorization
temporal_features = [col for col in master_data.columns if any(x in col for x in ['year', 'month', 'day', 'season', 'week', 'quarter', 'sin', 'cos', 'weekend'])]
lag_features = [col for col in master_data.columns if any(x in col for x in ['lag_', 'rolling_', 'ewm_', 'change_'])]
spatial_features = [col for col in master_data.columns if any(x in col for x in ['latitude', 'longitude'])]
neighbor_features = [col for col in master_data.columns if 'neighbor' in col]
interaction_features = [col for col in master_data.columns if any(x in col for x in ['ratio', 'product', 'sum', 'AQI'])]
base_pollutants = ['CO', 'NO2', 'PM10']

print("FEATURE ENGINEERING SUMMARY")
print("="*80)
print(f"Total features: {len(master_data.columns)}")
print(f"\nFeature Categories:")
print(f"  - Base pollutants: {len(base_pollutants)}")
print(f"  - Temporal features: {len(temporal_features)}")
print(f"  - Lag/Rolling features: {len(lag_features)}")
print(f"  - Spatial features: {len(spatial_features)}")
print(f"  - Neighbor features: {len(neighbor_features)}")
print(f"  - Interaction features: {len(interaction_features)}")

print(f"\nSample of key features:")
print(f"\nTemporal: {temporal_features[:10]}")
print(f"\nLag: {lag_features[:10]}")
print(f"\nNeighbor: {neighbor_features[:6]}")
print(f"\nInteraction: {interaction_features}")

## 2.9 Feature Scaling

Prepare scaled versions of features for ML models.

In [None]:
def scale_features(df, method='standard'):
    """
    Scale numerical features
    method: 'standard' (StandardScaler) or 'minmax' (MinMaxScaler)
    """
    df_scaled = df.copy()
    
    # Columns to exclude from scaling
    exclude_cols = ['country', 'date', 'season']
    
    # Get numerical columns
    numeric_cols = df_scaled.select_dtypes(include=[np.number]).columns.tolist()
    scale_cols = [col for col in numeric_cols if col not in exclude_cols]
    
    print(f"Scaling {len(scale_cols)} numerical features using {method} method...")
    
    if method == 'standard':
        scaler = StandardScaler()
    else:
        scaler = MinMaxScaler()
    
    # Fit and transform
    df_scaled[scale_cols] = scaler.fit_transform(df_scaled[scale_cols])
    
    print(f"✓ Features scaled successfully")
    
    return df_scaled, scaler, scale_cols

# Create scaled version
master_data_scaled, feature_scaler, scaled_columns = scale_features(master_data, method='standard')

print(f"\nScaled dataset shape: {master_data_scaled.shape}")

# Save scaler for later use
import joblib
joblib.dump(feature_scaler, os.path.join(OUTPUT_PATH, 'feature_scaler.pkl'))
joblib.dump(scaled_columns, os.path.join(OUTPUT_PATH, 'scaled_columns.pkl'))

print("✓ Scaler saved to disk")

## 2.10 Encode Categorical Features

In [None]:
# One-hot encode season
if 'season' in master_data.columns:
    season_dummies = pd.get_dummies(master_data['season'], prefix='season')
    master_data = pd.concat([master_data, season_dummies], axis=1)
    
    season_dummies_scaled = pd.get_dummies(master_data_scaled['season'], prefix='season')
    master_data_scaled = pd.concat([master_data_scaled, season_dummies_scaled], axis=1)
    
    print(f"✓ Encoded season into {len(season_dummies.columns)} dummy variables")

# One-hot encode country (for some models)
country_dummies = pd.get_dummies(master_data['country'], prefix='country')
master_data_with_country = pd.concat([master_data, country_dummies], axis=1)

country_dummies_scaled = pd.get_dummies(master_data_scaled['country'], prefix='country')
master_data_scaled_with_country = pd.concat([master_data_scaled, country_dummies_scaled], axis=1)

print(f"✓ Encoded country into {len(country_dummies.columns)} dummy variables")
print(f"\nFinal feature count (with country encoding): {len(master_data_with_country.columns)}")

## 2.11 Export Feature-Engineered Datasets

In [None]:
# Export main dataset (unscaled)
output_file_unscaled = os.path.join(OUTPUT_PATH, 'features_engineered.csv')
master_data.to_csv(output_file_unscaled, index=False)
master_data.to_pickle(os.path.join(OUTPUT_PATH, 'features_engineered.pkl'))

# Export scaled dataset
output_file_scaled = os.path.join(OUTPUT_PATH, 'features_engineered_scaled.csv')
master_data_scaled.to_csv(output_file_scaled, index=False)
master_data_scaled.to_pickle(os.path.join(OUTPUT_PATH, 'features_engineered_scaled.pkl'))

# Export with country encoding (for tree-based models)
master_data_with_country.to_pickle(os.path.join(OUTPUT_PATH, 'features_engineered_with_country.pkl'))
master_data_scaled_with_country.to_pickle(os.path.join(OUTPUT_PATH, 'features_engineered_scaled_with_country.pkl'))

# Save feature lists
feature_info = {
    'temporal_features': temporal_features,
    'lag_features': lag_features,
    'spatial_features': spatial_features,
    'neighbor_features': neighbor_features,
    'interaction_features': interaction_features,
    'base_pollutants': base_pollutants,
    'all_features': master_data.columns.tolist()
}

import json
with open(os.path.join(OUTPUT_PATH, 'feature_info.json'), 'w') as f:
    json.dump(feature_info, f, indent=2)

print("="*80)
print("FEATURE ENGINEERING COMPLETED")
print("="*80)
print(f"\nExported files:")
print(f"  1. features_engineered.pkl (unscaled)")
print(f"  2. features_engineered_scaled.pkl (scaled)")
print(f"  3. features_engineered_with_country.pkl (with country dummies)")
print(f"  4. features_engineered_scaled_with_country.pkl (scaled with country dummies)")
print(f"  5. feature_scaler.pkl (scaler object)")
print(f"  6. feature_info.json (feature categorization)")
print(f"  7. distance_matrix.csv")
print(f"  8. adjacency_matrix.csv")
print(f"  9. neighbors_dict.json")

print(f"\nFinal dataset statistics:")
print(f"  - Total records: {len(master_data):,}")
print(f"  - Total features: {len(master_data.columns)}")
print(f"  - Date range: {master_data['date'].min()} to {master_data['date'].max()}")
print(f"  - Countries: {master_data['country'].nunique()}")
print(f"  - Memory usage: {master_data.memory_usage(deep=True).sum() / 1024**2:.2f} MB")

print("\n✓ Ready for exploratory data analysis and ML modeling!")

## Summary

### Completed Tasks:
1. ✓ Created comprehensive temporal features (20+ features)
2. ✓ Generated lag and rolling window features for pollution persistence
3. ✓ Added spatial coordinates (centroids) for each country
4. ✓ Built spatial adjacency matrix and identified neighbors
5. ✓ Created neighbor pollution features for transboundary analysis
6. ✓ Engineered cross-pollutant interaction features
7. ✓ Scaled features using StandardScaler
8. ✓ Encoded categorical variables
9. ✓ Exported multiple versions of the dataset

### Key Achievements:
- **100+ features** engineered from raw pollution data
- Spatial relationships captured through adjacency and distance matrices
- Temporal dynamics encoded through lags and rolling statistics
- Transboundary effects prepared via neighbor pollution aggregates

### Next Steps:
**Notebook 03: Exploratory Data Analysis**
- Visualize temporal trends
- Analyze spatial patterns
- Explore correlations
- Identify key relationships for modeling