<a href="https://colab.research.google.com/github/arko-14/HYDROPREDICT/blob/main/Model_testing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import joblib
# Loading the saved XGBoost model
xgb_model = joblib.load('/content/drive/MyDrive/xgboost_model.joblib')
print("XGBoost model loaded successfully!")

XGBoost model loaded successfully!


In [6]:
!pip install dask[dataframe]

Collecting dask-expr<1.2,>=1.1 (from dask[dataframe])
  Downloading dask_expr-1.1.17-py3-none-any.whl.metadata (2.6 kB)
INFO: pip is looking at multiple versions of dask-expr to determine which version is compatible with other requirements. This could take a while.
  Downloading dask_expr-1.1.16-py3-none-any.whl.metadata (2.5 kB)
Downloading dask_expr-1.1.16-py3-none-any.whl (243 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m243.2/243.2 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: dask-expr
Successfully installed dask-expr-1.1.16


In [9]:
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Load the test datasets
try:
    rainfall_test_data = pd.read_csv('/content/drive/MyDrive/dataset_train.csv', encoding='latin-1')
    groundwater_test_data = pd.read_csv('/content/drive/MyDrive/groundwater_train2.csv', encoding='latin-1')

    # Convert 'time' column to numeric, handling non-numeric values
    print("Unique values in 'time' column before cleaning:", rainfall_test_data['time'].unique())
    rainfall_test_data['time'] = pd.to_numeric(rainfall_test_data['time'], errors='coerce')

    # Check for NaN values after conversion and handle them
    if rainfall_test_data['time'].isnull().any():
        print("Warning: Found NaN values in 'time' column after conversion.")
        rainfall_test_data['time'].fillna(rainfall_test_data['time'].median(), inplace=True)

    # Verify the conversion
    print("Unique values in 'time' column after cleaning:", rainfall_test_data['time'].unique())
    rainfall_test_data['time'] = rainfall_test_data['time'].astype(float)

    # Convert other columns to numeric if necessary
    numeric_columns = ['time', 'precipitation(mm)', 'rain(mm)', 'temperature_80m (°C)', 'Year']
    for col in numeric_columns:
        rainfall_test_data[col] = pd.to_numeric(rainfall_test_data[col], errors='coerce')

    # Handle any NaNs in the numeric columns
    rainfall_test_data[numeric_columns] = rainfall_test_data[numeric_columns].fillna(rainfall_test_data[numeric_columns].median())

    # Convert categorical columns using one-hot encoding
    location_dummies = pd.get_dummies(rainfall_test_data['Location'], prefix='Location', drop_first=True)
    month_dummies = pd.get_dummies(rainfall_test_data['Month'], prefix='Month', drop_first=True)

    # Combine all features into the test set
    X_test = pd.concat([rainfall_test_data[numeric_columns], location_dummies, month_dummies], axis=1)

    # Define the target variable from groundwater data
    y_test_groundwater = groundwater_test_data['Water Level'].astype(float)

    # Make predictions on the test data
    y_pred_groundwater = xgb_model.predict(X_test)

    # Evaluate the model
    mae_groundwater = mean_absolute_error(y_test_groundwater, y_pred_groundwater)
    rmse_groundwater = np.sqrt(mean_squared_error(y_test_groundwater, y_pred_groundwater))
    r2_groundwater = r2_score(y_test_groundwater, y_pred_groundwater)

    print("Groundwater Level Predictions:")
    print(f"Mean Absolute Error: {mae_groundwater:.4f}")
    print(f"Root Mean Squared Error: {rmse_groundwater:.4f}")
    print(f"R² Score: {r2_groundwater:.4f}")

except FileNotFoundError as e:
    print(f"Error: Could not find one of the data files. {e}")
except KeyError as e:
    print(f"Error: Missing required column. {e}")
except Exception as e:
    print(f"Error: An unexpected error occurred. {e}")


Unique values in 'time' column before cleaning: ['2023-01-01T00:00' '2023-01-01T01:00' '2023-01-01T02:00' ...
 '2023-11-30T21:00' '2023-11-30T22:00' '2023-11-30T23:00']
Unique values in 'time' column after cleaning: [nan]
Error: Missing required column. 'temperature_80m (°C)'


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  rainfall_test_data['time'].fillna(rainfall_test_data['time'].median(), inplace=True)
