In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [9]:
import joblib
import pandas as pd
import numpy as np

In [4]:
# Load model and label encoder
model = joblib.load('/content/drive/My Drive/water-quality/wqi_xgb_pipeline.pkl')
le = joblib.load('/content/drive/My Drive/water-quality/label_encoder.pkl')

In [8]:
# Sample data - need to get this from UI

test_data = {
 'latitude': 38.5358,
 'longitude': -121.5206,
 'station_type': 'Surface Water',
 'sample_date': '2011-12-29 08:30:00',
 'sample_depth_meter': 1.0,
 'DissolvedOxygen_mg/L': 8.86,
 'SpecificConductance_µS/cm': 375.0,
 'Turbidity_NTU': 7.8,
 'WaterTemperature_°C': 8.0,
 'pH_pH units': 7.71}

test_dp = pd.DataFrame([test_data])

In [10]:
def engineer_features(df):

    df = df.copy()
    if 'sample_date' in df.columns:
        df['sample_date'] = pd.to_datetime(df['sample_date'])
        df['Month'] = df['sample_date'].dt.month
        # Cyclical encoding: Dec (12) is close to Jan (1)
        df['Month_sin'] = np.sin(2 * np.pi * df['Month']/12)
        df['Month_cos'] = np.cos(2 * np.pi * df['Month']/12)

    # Interaction Features
    # Oxygen Saturation Proxy: DO / Temp (Cold water holds more oxygen)
    # We add 1 to temp to avoid division by zero
    if 'DissolvedOxygen_mg/L' in df.columns and 'WaterTemperature_°C' in df.columns:
        df['DO_Temp_Ratio'] = df['DissolvedOxygen_mg/L'] / (df['WaterTemperature_°C'] + 1)

    # Handle Depth
    # Fill missing depth with 0 (assume surface if not recorded)
    if 'sample_depth_meter' in df.columns:
        df['sample_depth_meter'] = df['sample_depth_meter'].fillna(0)

    return df

In [11]:
test_dp_engineered = engineer_features(test_dp)

In [12]:
# Defining numeric and categorical features
numeric_features = [
    'DissolvedOxygen_mg/L', 'pH_pH units', 'Turbidity_NTU',
    'SpecificConductance_µS/cm', 'WaterTemperature_°C',
    'sample_depth_meter', 'DO_Temp_Ratio',
    'latitude', 'longitude',
    'Month_sin', 'Month_cos'
]

# We use station_type. We skip 'county_name' because Lat/Lon is more precise.
categorical_features = ['station_type']

test_dp_engineered = test_dp_engineered[numeric_features + categorical_features]

In [16]:
# Prediction

pred = model.predict(test_dp_engineered)
water_quality_status = le.inverse_transform(pred)[0]

In [17]:
water_quality_status

'Moderate'