# Mann kendell

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from pymannkendall import original_test

# Step 1: Read the CSV file into a DataFrame
df = pd.read_csv('Data Subset.csv', sep=';')

# Step 2: Data Preprocessing
df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')

# Step 3: Check and convert the 'level' column to a numeric data type
df['level'] = pd.to_numeric(df['level'], errors='coerce')


# Step 4: Create a list to store the Mann-Kendall test results for each site
site_results = []

# Step 5: Conduct Mann-Kendall test for each site
sites = df['site'].unique()

for site in sites:
    site_data = df[df['site'] == site]
    result = original_test(site_data['level'], alpha=0.05)
    site_results.append({'site': site, 'trend': result.trend, 'p_value': result.p, 'h': result.h})

# Step 6: Print the Mann-Kendall test results for each site
print("Mann-Kendall Test Results:")
for result in site_results:
    print(f"Site: {result['site']}, Trend: {result['trend']}, P-Value: {result['p_value']}, H: {result['h']}")

# Step 7: Plot the Mann-Kendall test results on a graph
plt.figure(figsize=(12, 8))
for result in site_results:
    site_data = df[df['site'] == result['site']]
    plt.plot(site_data['date'], site_data['level'], label=f"{result['site']} - Trend: {result['trend']}", marker='o')

plt.xlabel('Date')
plt.ylabel('Water Level')
plt.title('Mann-Kendall Test Results for Each Site')
# plt.legend()
plt.tight_layout()
plt.show()


# Using Scipy Works

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import kendalltau

# Step 1: Read the CSV file into a DataFrame
df = pd.read_csv('Data Subset.csv', sep=',')

# Step 2: Data Preprocessing
df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')

# Step 3: Check and convert the 'level' column to a numeric data type
df['level'] = pd.to_numeric(df['level'], errors='coerce')

# Step 4: Create a list to store the Kendall's Tau and p-values for each site
site_results = []

# Step 5: Conduct Mann-Kendall test for each site
sites = df['site'].unique()

for site in sites:
    site_data = df[df['site'] == site]
    tau, p_value = kendalltau(site_data['level'], site_data['date'].map(pd.Timestamp.toordinal))
    site_results.append({'site': site, 'tau': tau, 'p_value': p_value})

# Step 6: Print the Kendall's Tau and p-values for each site
print("Kendall's Tau and P-Values:")
for result in site_results:
    print(f"Site: {result['site']}, Kendall's Tau: {result['tau']}, P-Value: {result['p_value']}")

#     # Step 7: Plot the Mann-Kendall test results on a graph
#     plt.figure(figsize=(12, 8))
#     for result in site_results:
#         site_data = df[df['site'] == result['site']]
#         plt.plot(site_data['date'], site_data['level'], label=f"{result['site']} - Kendall's Tau: {result['tau']:.2f}", marker='o')

#     plt.xlabel('Date')
#     plt.ylabel('Water Level')
#     plt.title("Kendall's Tau Test Results for Each Site")
#     plt.legend()
#     plt.tight_layout()
#     plt.show()


# Trendline (needs edits)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import kendalltau

# Step 1: Read the CSV file into a DataFrame
df = pd.read_csv('Fake.csv', sep=';')

# Step 2: Data Preprocessing
df['date'] = pd.to_datetime(df['date'], format='%d/%m/%Y')

# Step 3: Check and convert the 'level' column to a numeric data type
df['level'] = pd.to_numeric(df['level'], errors='coerce')

# Step 4: Create a list to store the Kendall's Tau and p-values for each site
site_results = []

# Step 5: Conduct Mann-Kendall test for each site
sites = df['site'].unique()

for site in sites:
    site_data = df[df['site'] == site]
    tau, p_value = kendalltau(site_data['level'], site_data['date'].map(pd.Timestamp.toordinal))
    site_results.append({'site': site, 'tau': tau, 'p_value': p_value})

# Step 6: Plot the Mann-Kendall test results on a graph
plt.figure(figsize=(12, 8))
for result in site_results:
    site_data = df[df['site'] == result['site']]
    plt.plot(site_data['date'], site_data['level'], label=f"{result['site']}", marker='o')

    # Calculate the linear regression coefficients (trendline)
    slope = result['tau'] * (site_data['date'].map(pd.Timestamp.toordinal).max() - site_data['date'].map(pd.Timestamp.toordinal).min())
    intercept = site_data['level'].mean() - slope * site_data['date'].map(pd.Timestamp.toordinal).mean()

    # Plot the trendline
    trendline = site_data['date'].map(pd.Timestamp.toordinal) * slope + intercept
    plt.plot(site_data['date'], trendline, label=f"{result['site']} Trendline")

plt.xlabel('Date')
plt.ylabel('Water Level')
plt.title("Kendall's Tau Test Results for Each Site with Trendlines")
plt.legend()
plt.tight_layout()
plt.show()


# More advanced 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import kendalltau

# Step 1: Read the CSV file into a DataFrame
df = pd.read_csv('Data Subset.csv', sep=',')

# Step 2: Data Preprocessing
df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')

# Step 3: Check and convert the 'level' column to a numeric data type
df['level'] = pd.to_numeric(df['level'], errors='coerce')

# Step 4: Create a list to store the Kendall's Tau and p-values for each site
site_results = []

# Step 5: Conduct Mann-Kendall test for each site
sites = df['site'].unique()
relevant_trends = 0
irrelevant_trends = 0

for site in sites:
    site_data = df[df['site'] == site]
    tau, p_value = kendalltau(site_data['level'], site_data['date'].map(pd.Timestamp.toordinal))
    site_results.append({'site': site, 'tau': tau, 'p_value': p_value})
    if p_value < 0.05:
        relevant_trends += 1
    else:
        irrelevant_trends += 1

# Step 6: Print the Kendall's Tau and p-values for each site
print("Kendall's Tau and P-Values:")
for result in site_results:
    print(f"Site: {result['site']}, Kendall's Tau: {result['tau']}, P-Value: {result['p_value']}")
    if result['p_value'] < 0.05:
        print("Relevant trend: The trend is statistically significant.")
    else:
        print("Not Relevant trend: The trend is not statistically significant.")

# Step 7: Save the sites with information about trend relevance to a new CSV file
result_df = pd.DataFrame(site_results)
result_df.to_csv('trend_results.csv', index=False)

# Step 8: Compute and display the count of relevant and irrelevant trends
print(f"Number of relevant trends: {relevant_trends}")
print(f"Number of irrelevant trends: {irrelevant_trends}")

# Step 9: Plot the Mann-Kendall test results on a graph
plt.figure(figsize=(12, 8))
for result in site_results:
    site_data = df[df['site'] == result['site']]
    label = f"{result['site']} - Kendall's Tau: {result['tau']:.2f}"
    marker = 'o' if result['p_value'] < 0.05 else 'x'
    plt.plot(site_data['date'], site_data['level'], label=label, marker=marker)

# plt.xlabel('Date')
# plt.ylabel('Water Level')
# plt.title("Kendall's Tau Test Results for Each Site")
# plt.legend()
# plt.tight_layout()
# plt.show()


# Mann Kenall witj %

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import kendalltau

# Step 1: Read the CSV file into a DataFrame
df = pd.read_csv('Data Subset.csv', sep=',')

# Step 2: Data Preprocessing
df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')

# Step 3: Check and convert the 'level' column to a numeric data type
df['level'] = pd.to_numeric(df['level'], errors='coerce')

# Step 4: Create a list to store the Kendall's Tau and p-values for each site
site_results = []

# Step 5: Conduct Mann-Kendall test for each site
sites = df['site'].unique()
relevant_trends = 0
irrelevant_trends = 0
positive_trends = 0
negative_trends = 0

for site in sites:
    site_data = df[df['site'] == site]
    tau, p_value = kendalltau(site_data['level'], site_data['date'].map(pd.Timestamp.toordinal))
    site_results.append({'site': site, 'tau': tau, 'p_value': p_value})
    if p_value < 0.05:
        relevant_trends += 1
        if tau > 0:
            positive_trends += 1
        else:
            negative_trends += 1
    else:
        irrelevant_trends += 1

# # Step 6: Print the Kendall's Tau and p-values for each site
# print("Kendall's Tau and P-Values:")
# for result in site_results:
#     print(f"Site: {result['site']}, Kendall's Tau: {result['tau']}, P-Value: {result['p_value']}")
#     if result['p_value'] < 0.05:
#         print("Relevant trend: The trend is statistically significant.")
#     else:
#         print("Not Relevant trend: The trend is not statistically significant.")

# Step 7: Save the sites with information about trend relevance to a new CSV file
result_df = pd.DataFrame(site_results)
result_df.to_csv('trend_results.csv', index=False)

# Step 8: Compute and display the count of relevant and irrelevant trends
print(f"Number of relevant trends: {relevant_trends}")
print(f"Number of irrelevant trends: {irrelevant_trends}")

# Step 9: Compute and display the percentage of positive and negative trends
total_relevant_trends = positive_trends + negative_trends
percentage_positive_trends = (positive_trends / total_relevant_trends) * 100
percentage_negative_trends = (negative_trends / total_relevant_trends) * 100
print(f"Percentage of positive trends: {percentage_positive_trends:.2f}%")
print(f"Percentage of negative trends: {percentage_negative_trends:.2f}%")

# # Step 10: Plot the Mann-Kendall test results on a graph
# plt.figure(figsize=(12, 8))
# for result in site_results:
#     site_data = df[df['site'] == result['site']]
#     label = f"{result['site']} - Kendall's Tau: {result['tau']:.2f}"
#     marker = 'o' if result['p_value'] < 0.05 else 'x'
#     plt.plot(site_data['date'], site_data['level'], label=label, marker=marker)

# plt.xlabel('Date')
# plt.ylabel('Water Level')
# plt.title("Kendall's Tau Test Results for Each Site")
# plt.legend()
# plt.tight_layout()
# plt.show()


Number of relevant trends: 1871
Number of irrelevant trends: 341
Percentage of positive trends: 11.06%
Percentage of negative trends: 88.94%


# Mann Kendall with sens slope

In [4]:
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import kendalltau

# Step 1: Read the CSV file into a DataFrame
df = pd.read_csv('Data Subset.csv', sep=',')

# Step 2: Data Preprocessing
df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')

# Step 3: Check and convert the 'level' column to a numeric data type
df['level'] = pd.to_numeric(df['level'], errors='coerce')

# Step 4: Create a list to store the Kendall's Tau and p-values for each site
site_results = []

# Step 5: Conduct Mann-Kendall test and Sen's slope for each site
sites = df['site'].unique()
relevant_trends = 0
irrelevant_trends = 0
positive_trends = 0
negative_trends = 0
positive_slopes = 0
negative_slopes = 0

for site in sites:
    site_data = df[df['site'] == site]
    
    # Calculate Kendall's Tau
    tau, p_value = kendalltau(site_data['level'], site_data['date'].map(pd.Timestamp.toordinal))
    
    # Calculate Sen's slope
    n = len(site_data)
    sen_slope = (site_data['level'].iloc[-1] - site_data['level'].iloc[0]) / (n - 1)
    
    site_results.append({'site': site, 'tau': tau, 'p_value': p_value, 'sen_slope': sen_slope})
    
    if p_value < 0.05:
        relevant_trends += 1
        if tau > 0:
            positive_trends += 1
            if sen_slope > 0:
                positive_slopes += 1
            else:
                negative_slopes += 1
        else:
            negative_trends += 1
            if sen_slope > 0:
                positive_slopes += 1
            else:
                negative_slopes += 1
    else:
        irrelevant_trends += 1

# # Step 6: Print the Kendall's Tau, p-values, and Sen's slopes for each site
# print("Kendall's Tau, P-Values, and Sen's Slopes:")
# for result in site_results:
#     print(f"Site: {result['site']}, Kendall's Tau: {result['tau']}, P-Value: {result['p_value']}, Sen's Slope: {result['sen_slope']:.2f}")
#     if result['p_value'] < 0.05:
#         print("Relevant trend: The trend is statistically significant.")
#     else:
#         print("Not Relevant trend: The trend is not statistically significant.")

# Step 7: Save the sites with information about trend relevance and Sen's slopes to a new CSV file
result_df = pd.DataFrame(site_results)
result_df.to_csv('trend_results.csv', index=False)

# Step 8: Compute and display the count of relevant and irrelevant trends
print(f"Number of relevant trends: {relevant_trends}")
print(f"Number of irrelevant trends: {irrelevant_trends}")

# Step 9: Compute and display the percentage of positive and negative trends
total_relevant_trends = positive_trends + negative_trends
percentage_positive_trends = (positive_trends / total_relevant_trends) * 100
percentage_negative_trends = (negative_trends / total_relevant_trends) * 100
print(f"Percentage of positive trends: {percentage_positive_trends:.2f}%")
print(f"Percentage of negative trends: {percentage_negative_trends:.2f}%")

# Step 10: Compute and display the count of positive and negative Sen's slopes
print(f"Number of sites with positive Sen's slope: {positive_slopes}")
print(f"Number of sites with negative Sen's slope: {negative_slopes}")

# # Step 11: Plot the Mann-Kendall test results on a graph
# plt.figure(figsize=(12, 8))
# for result in site_results:
#     site_data = df[df['site'] == result['site']]
#     label = f"{result['site']} - Kendall's Tau: {result['tau']:.2f}"
#     marker = 'o' if result['p_value'] < 0.05 else 'x'
#     plt.plot(site_data['date'], site_data['level'], label=label, marker=marker)

# plt.xlabel('Date')
# plt.ylabel('Water Level')
# plt.title("Kendall's Tau Test Results for Each Site")
# plt.legend()
# plt.tight_layout()
# plt.show()


Number of relevant trends: 1871
Number of irrelevant trends: 341
Percentage of positive trends: 11.06%
Percentage of negative trends: 88.94%
Number of sites with positive Sen's slope: 228
Number of sites with negative Sen's slope: 1643


# Modified Mann Kendall

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import kendalltau

# Function to calculate the Modified Mann-Kendall test
def modified_mann_kendall(data):
    n = len(data)
    s = 0
    for i in range(n-1):
        for j in range(i+1, n):
            if data.iloc[j] > data.iloc[i]:
                s += 1
            elif data.iloc[j] < data.iloc[i]:
                s -= 1
    return s


# Step 1: Read the CSV file into a DataFrame
df = pd.read_csv('groundwater_timeseries_data_Negative.csv', sep=',')

# Step 2: Data Preprocessing
df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')

# Step 3: Check and convert the 'level' column to a numeric data type
df['level'] = pd.to_numeric(df['level'], errors='coerce')

# Step 4: Create a list to store the Kendall's Tau and p-values for each site
site_results = []

# Step 5: Conduct Mann-Kendall test, Modified Mann-Kendall test, and Sen's slope for each site
sites = df['site'].unique()
relevant_trends = 0
irrelevant_trends = 0
positive_trends = 0
negative_trends = 0
positive_slopes = 0
negative_slopes = 0

for site in sites:
    site_data = df[df['site'] == site]
    
    # Calculate Kendall's Tau
    tau, p_value = kendalltau(site_data['level'], site_data['date'].map(pd.Timestamp.toordinal))
    
    # Calculate Modified Mann-Kendall test
    mmk = modified_mann_kendall(site_data['level'])
    
    # Calculate Sen's slope
    n = len(site_data)
    sen_slope = (site_data['level'].iloc[-1] - site_data['level'].iloc[0]) / (n - 1)
    
    site_results.append({'site': site, 'tau': tau, 'p_value': p_value, 'sen_slope': sen_slope, 'mmk': mmk})
    
    if p_value < 0.05:
        relevant_trends += 1
        if tau > 0:
            positive_trends += 1
            if sen_slope > 0:
                positive_slopes += 1
            else:
                negative_slopes += 1
        else:
            negative_trends += 1
            if sen_slope > 0:
                positive_slopes += 1
            else:
                negative_slopes += 1
    else:
        irrelevant_trends += 1

# # Step 6: Print the Kendall's Tau, p-values, Modified Mann-Kendall test, and Sen's slopes for each site
# print("Kendall's Tau, P-Values, Modified Mann-Kendall, and Sen's Slopes:")
# for result in site_results:
#     print(f"Site: {result['site']}, Kendall's Tau: {result['tau']}, P-Value: {result['p_value']}, Modified Mann-Kendall: {result['mmk']}, Sen's Slope: {result['sen_slope']:.2f}")
#     if result['p_value'] < 0.05:
#         print("Relevant trend: The trend is statistically significant.")
#     else:
#         print("Not Relevant trend: The trend is not statistically significant.")

# Step 7: Save the sites with information about trend relevance, Modified Mann-Kendall, and Sen's slopes to a new CSV file
result_df = pd.DataFrame(site_results)
# result_df.to_csv('trend_results.csv', index=False)

# Step 8: Compute and display the count of relevant and irrelevant trends
print(f"Number of relevant trends: {relevant_trends}")
print(f"Number of irrelevant trends: {irrelevant_trends}")

# Step 9: Compute and display the percentage of positive and negative trends
total_relevant_trends = positive_trends + negative_trends
percentage_positive_trends = (positive_trends / total_relevant_trends) * 100
percentage_negative_trends = (negative_trends / total_relevant_trends) * 100
print(f"Percentage of positive trends: {percentage_positive_trends:.2f}%")
print(f"Percentage of negative trends: {percentage_negative_trends:.2f}%")

# Step 10: Compute and display the count of positive and negative Sen's slopes
print(f"Number of sites with positive Sen's slope: {positive_slopes}")
print(f"Number of sites with negative Sen's slope: {negative_slopes}")

# # Step 11: Plot the Mann-Kendall test results on a graph
# plt.figure(figsize=(12, 8))
# for result in site_results:
#     site_data = df[df['site'] == result['site']]
#     label = f"{result['site']} - Kendall's Tau: {result['tau']:.2f}"
#     marker = 'o' if result['p_value'] < 0.05 else 'x'
#     plt.plot(site_data['date'], site_data['level'], label=label, marker=marker)

# plt.xlabel('Date')
# plt.ylabel('Water Level')
# plt.title("Kendall's Tau Test Results for Each Site")
# plt.legend()
# plt.tight_layout()
# plt.show()
