# Code according to Spains Methodology

1) Select a timeseries
2) Consider 6 years as 1 cycle
3) Calculate the average monthly value
4) Check to see if at least 1/3 of the months have data
5) Check to see if there is a trend
6) If a trend exists, change the cycle to two cycles.
7) Check to see the trend of each of the sites.
8) If greater than 20% have a decreasing trend, the status is bad.

In [3]:
import pandas as pd
from datetime import datetime, timedelta
import numpy as np
import pymannkendall as mk

# Load the CSV file into a pandas DataFrame
filename = 'groundwater_timeseries_data_Negative.csv'
df = pd.read_csv(filename)

# Convert the 'date' column to datetime format
df['date'] = pd.to_datetime(df['date'])

# Ask the user to select a date
while True:
    selected_date_str = input("Please enter a date (YYYY-MM-DD): ")
    try:
        selected_date = pd.to_datetime(selected_date_str)
        break
    except ValueError:
        print("Invalid date format. Please enter the date in YYYY-MM-DD format.")


# Calculate the start and end dates for the 6-year reporting cycle (previous and next)

start_date_2_cycles = selected_date - pd.DateOffset(years=6)
start_date_1_cycle = selected_date
end_date = selected_date + pd.DateOffset(years=6)

# Find the first and last dates in the data
first_date_in_data = df['date'].min()
last_date_in_data = df['date'].max()

# Check if the selected date and reporting cycle exist within the available data
if start_date_2_cycles < first_date_in_data or end_date > last_date_in_data:
    print("The analysis cannot be completed using this date. There needs to be a 6-year timeseries before and after the selected date. Please select a data accordingly.")
else:
    # Filter the DataFrame to the selected date and the previous 6 years
    two_cycle_df_ini = df[(df['date'] >= start_date_2_cycles) & (df['date'] <= end_date)]

    # Filter the DataFrame to the selected date and the next 6 years
    one_cycle_df_ini = df[(df['date'] >= start_date_1_cycle) & (df['date'] <= end_date)]
        
    # Remove rows where level values are 0
    two_cycle_df = two_cycle_df_ini[two_cycle_df_ini['level'] != 0]
    one_cycle_df = one_cycle_df_ini[one_cycle_df_ini['level'] != 0] 
    
    # Count the number of months in one_cycle_df_ini and one_cycle_df
    total_months_one_cycle_ini = len(one_cycle_df_ini['date'].dt.to_period('M').unique())
    total_months_one_cycle = len(one_cycle_df['date'].dt.to_period('M').unique())

    # Check if the number of months in both dataframes is >= one-third of the number of months in one_cycle_df_ini
    if total_months_one_cycle >= total_months_one_cycle_ini / 3:
        # Proceed with the analysis
        
        # Calculate the average level for each month and site for 2 cycles
        avg_two_cycle_df = two_cycle_df.groupby(['date', 'site'])['level'].mean().reset_index()
        
        # Calculate the average level for each month and site for 1 cycle
        avg_one_cycle_df = one_cycle_df.groupby(['date', 'site'])['level'].mean().reset_index()
        
        # Initialize counters for total sites 
        total_sites_one_cycle = len(avg_one_cycle_df['site'].unique())
        total_sites_two_cycles = len(avg_one_cycle_df['site'].unique())
        
        results_one_cycle = []
        data = avg_one_cycle_df 
        for site, site_data in data.groupby("site"):
            trend_result = mk.original_test(site_data["level"])
            results_one_cycle.append({
                "site": site,
                "trend": trend_result.trend,
                
                "h": trend_result.h,
                "p": trend_result.p,
                "z": trend_result.z,
                "Tau": trend_result.Tau,
                "s": trend_result.s,
                "var_s": trend_result.var_s,
                "slope": trend_result.slope,
                "intercept": trend_result.intercept
            })
            # Create a new DataFrame with results
        results_one_cycle_df = pd.DataFrame(results_one_cycle)

        # Write the results to a new CSV file
        output_file = "Cycle_one_results.csv"
        results_one_cycle_df.to_csv(output_file, index=False)      
        
        results_two_cycle = []
        data = avg_two_cycle_df 
        for site, site_data in data.groupby("site"):
            trend_result = mk.original_test(site_data["level"])
            results_two_cycle.append({
                "site": site,
                "trend": trend_result.trend,
                "h": trend_result.h,
                "p": trend_result.p,
                "z": trend_result.z,
                "Tau": trend_result.Tau,
                "s": trend_result.s,
                "var_s": trend_result.var_s,
                "slope": trend_result.slope,
                "intercept": trend_result.intercept
            })
            # Create a new DataFrame with results
        results_two_cycle_df = pd.DataFrame(results_two_cycle)

        # Write the results to a new CSV file
        output_file = "Cycle_two_results.csv"
        results_two_cycle_df.to_csv(output_file, index=False)
        
        # Calculate the number of rows where the trend is "decreasing" for one cycle
        decreasing_sites_one_cycle = len(results_one_cycle_df[results_one_cycle_df['trend'] == 'decreasing'])

        # Calculate the percentage
        percentage_decreasing_one_cycle = (decreasing_sites_one_cycle / total_sites_one_cycle) * 100

        print(f"Percentage of sites with decreasing trend for one cycle is: {percentage_decreasing_one_cycle:.2f}%")
               
        # Calculate the number of rows where the trend is "decreasing" for two cycles
        decreasing_sites_two_cycles = len(results_two_cycle_df[results_two_cycle_df['trend'] == 'decreasing'])

        # Calculate the percentage
        percentage_decreasing_two_cycles = (decreasing_sites_two_cycles / total_sites_two_cycles) * 100

        print(f"Percentage of sites with decreasing trend for two cycles are: {percentage_decreasing_two_cycles:.2f}%")
        # Check if the percentage of decreasing sites is greater than 20%
        if percentage_decreasing_two_cycles > 20:
            print("The status of the aquifer is bad :(.")
        else:
            print("The status of the aquifer is good yay!")
        
    else:
        print("The analysis cannot be performed due to the lack of data for the selected reporting cycle.")
        
   


        


Please enter a date (YYYY-MM-DD):  2022-01-01


The analysis cannot be completed using this date. There needs to be a 6-year timeseries before and after the selected date. Please select a data accordingly.
