In [1]:
%%time

import pandas as pd
from datetime import datetime, timedelta
import numpy as np
import pymannkendall as mk

# Read the CSV file
file_path = 'groundwater_timeseries_data_Negative.csv'
df = pd.read_csv(file_path)

df['date'] = pd.to_datetime(df['date'])

# Get user input for date
while True:
    try:
        user_date = input("Enter a date (YYYY-MM-DD): ")
        user_date = datetime.strptime(user_date, "%Y-%m-%d")
        break
    except ValueError:
        print("Invalid date format. Please use YYYY-MM-DD format.")

# Calculate the start date (32 years prior to the target date)
start_date = user_date - pd.DateOffset(years=32)

# Check if the required years of data are available
if start_date.year < df['date'].dt.year.min() or user_date.year > df['date'].dt.year.max():
    print(f"Data is not available for 32 years prior to the selected date.")
else:
    # Create a new DataFrame with filtered data
    filtered_df = df[(df['date'] >= start_date) & (df['date'] <= user_date)]
    
    # Create empty lists to store the results
    mk_results = []
    sens_slope_results = []
    
    # Perform Mann-Kendall test and Sen's slope for each site
    for site in filtered_df['site'].unique():
        site_data = filtered_df[filtered_df['site'] == site]
        
        # Mann-Kendall test
        mk_result = mk.original_test(site_data['level'])
        mk_results.append([site, mk_result.slope, mk_result.intercept, mk_result.trend, mk_result.h, mk_result.p, mk_result.z, mk_result.Tau, mk_result.s])
        
        # Sen's slope
        sens_slope_result = mk.sens_slope(site_data['level'])
        sens_slope_results.append([site, sens_slope_result])
    
    # Create DataFrames from the results
    mk_results_df = pd.DataFrame(mk_results, columns=['site', 'slope', 'intercept', 'trend', 'h', 'p', 'z', 'Tau', 's'])
    sens_slope_results_df = pd.DataFrame(sens_slope_results, columns=['site', 'sens_slope'])
    
    # Splitting "sens_slope" column into "slope" and "intercept" columns
    sens_slope_results_df[['slope', 'intercept']] = sens_slope_results_df['sens_slope'].apply(pd.Series)
    sens_slope_results_df.drop(columns=['sens_slope'], inplace=True)
             
    # Save Sen's slope results to CSV and Excel files
    sens_slope_results_df.to_csv('sens_slope_results.csv', index=False)
    sens_slope_results_df.to_excel('sens_slope_results.xlsx', index=False)
    
    # Save Mann-Kendall results to CSV and Excel files
    mk_results_df.to_csv('mann_kendall_results.csv', index=False)
    mk_results_df.to_excel('mann_kendall_results.xlsx', index=False)
    
    # Calculate percentages
    positive_slope_percentage = (sens_slope_results_df['slope'] > 0).mean() * 100
    increasing_trend_percentage = (mk_results_df['trend'] == 'increasing').mean() * 100
    decreasing_trend_percentage = (mk_results_df['trend'] == 'decreasing').mean() * 100
    negative_z_percentage = (mk_results_df['z'] < 0).mean() * 100
    
    # Print results
    print(f"Percentage of sites with positive Sen's slope: {positive_slope_percentage:.2f}%")
    print(f"Percentage of sites with increasing trend: {increasing_trend_percentage:.2f}%")
    print(f"Percentage of sites with decreasing trend: {decreasing_trend_percentage:.2f}%")
    print(f"Percentage of sites with negative 'z' values: {negative_z_percentage:.2f}%")


Enter a date (YYYY-MM-DD):  2022-01-01


Percentage of sites with positive Sen's slope: 0.00%
Percentage of sites with increasing trend: 0.00%
Percentage of sites with decreasing trend: 100.00%
Percentage of sites with negative 'z' values: 100.00%
CPU times: total: 3min 26s
Wall time: 3min 30s
