In [2]:
import pandas as pd
import numpy as np
import re
from scipy.stats import kendalltau
import sys

# Step 1: Read the CSV file and preprocess the data
data = pd.read_csv("groundwater_timeseries_data_Negative.csv")
data['date'] = pd.to_datetime(data['date'])
data['level'] = data['level'].apply(lambda x: float(re.sub('[^0-9.]', '', x)) if isinstance(x, str) else np.nan)
data = data.dropna(subset=['level'])  # Drop rows with NaN values in the 'level' column

data = data.set_index('date')


# Step 2: Check data quality and compliance with criteria
def check_data_quality(data):
    # Check if there are any missing values in the data
    if data.isnull().values.any():
        raise ValueError("Data contains missing values.")
    
    # Check the number of measurements based on the minimum requirement
    total_months = len(data.index)
    min_required_measurements = total_months // 3
    if len(data) < min_required_measurements:
        raise ValueError("Insufficient data points for time series analysis.")
    
    # Check for gaps in the data based on the specified rules
    years_of_measurement = len(data.resample('Y').mean())
    if years_of_measurement <= 10:
        max_consecutive_gaps = 2
    elif years_of_measurement > 10 and years_of_measurement <= 20:
        max_consecutive_gaps = 4
    else:
        max_consecutive_gaps = 6

    
    consecutive_gaps = data.resample('Y').apply(lambda x: x.isnull().all()).astype(int).rolling(window=max_consecutive_gaps).sum()
    if (consecutive_gaps > 0).any(axis=1).any():
        raise ValueError("Data contains consecutive gaps that do not meet the specified criteria.")  # Properly indented this line

    return True

try:
    check_data_quality(data)
except ValueError as ve:
    print(f"Data quality check failed: {ve}")
    sys.exit(1)  # Terminate the script with a non-zero exit code on error


# # Step 3: Calculate the arithmetic mean of monthly water level data for each year
# yearly_mean = data.resample('Y').mean()

# def mann_kendall_test(x, alpha=0.1):
#     """
#     Perform the Mann-Kendall test for trend analysis.
#     x: Input time series data.
#     alpha: Significance level for the test (default: 0.1).
#     """
#     trend, p_value = kendalltau(x, range(len(x)))  # Provide 'y' as the sequence of ranks corresponding to 'x'
#     return trend, p_value <= alpha


# Step 5: Evaluate the percentage of piezometers with a downward trend
def evaluate_groundwater_trend(yearly_mean):
    total_piezometers = len(yearly_mean.columns)
    downward_trend_piezometers = 0

    for column in yearly_mean.columns:
        trend, is_significant = mann_kendall_test(yearly_mean[column])
        if trend < 0 and is_significant:
            downward_trend_piezometers += 1

    percentage_downward_trend = (downward_trend_piezometers / total_piezometers) * 100

    return percentage_downward_trend

percentage_trend = evaluate_groundwater_trend(yearly_mean)

# Step 6: Determine if the groundwater body has a downward trend
if percentage_trend >= 20:
    groundwater_body_trend = "downward"
else:
    groundwater_body_trend = "stable or upward"

# Print the results
print(f"Percentage of piezometers with a downward trend: {percentage_trend:.2f}%")
print(f"The groundwater body is considered {groundwater_body_trend} trend.")


Percentage of piezometers with a downward trend: 0.00%
The groundwater body is considered stable or upward trend.
