# Germany 

In [3]:
%%time 

import pandas as pd
import numpy as np
from scipy.stats import linregress

# Load the CSV data
data = pd.read_csv('groundwater_timeseries_data_Negative.csv')
# Convert the 'date' column to datetime
data['date'] = pd.to_datetime(data['date'])

# Ask the user to select a date
selected_date = pd.to_datetime(input("Please enter a date (YYYY-MM):"))

# Calculate earliest and latest dates in the dataset
earliest_date = pd.to_datetime(data['date'].min())
latest_date = pd.to_datetime(data['date'].max())

# Check if there is data 30 years prior to the selected date
start_date_30_years = selected_date - pd.DateOffset(years=30)

if start_date_30_years >= earliest_date:
    start_date = start_date_30_years
else:
    start_date = selected_date - pd.DateOffset(years=15)

# Check if there is enough data for the selected time period
if start_date < earliest_date:
    print("Not enough data available.")
    exit()

# Convert start_date to a Timestamp object
start_date = pd.Timestamp(start_date)

# Filter data for the selected time period
filtered_data = data[(data['date'] >= start_date) & (data['date'] <= selected_date)]

# Calculate min and max levels for each site
agg_funcs = {'level': ['min', 'max']}
min_max_levels = filtered_data.groupby('site').agg(agg_funcs).reset_index()

# Perform linear regression for each site
regression_results = []
for site in min_max_levels['site']:
    site_data = filtered_data[filtered_data['site'] == site]
    x = np.arange(len(site_data))
    y = site_data['level']
    slope, intercept, r_value, p_value, std_err = linregress(x, y)
    min_level = min(y)
    max_level = max(y)
    level_difference = max_level - min_level
    num_years = (selected_date.year - start_date.year) + 1  # Counting inclusive years
    slope_per_year = ((slope * 100) / num_years) # Convert slope to cm and divide by years
    
    # Determine status based on slope_per_year
    if slope_per_year < -2:
        status = 'falling sharply'
    elif -2 <= slope_per_year < -1:
        status = 'falling'
    elif -1 <= slope_per_year <= 1:
        status = 'consistent'
    elif 1 < slope_per_year <= 2:
        status = 'rising'
    else:
        status = 'strongly increasing'
    
    regression_results.append({'site': site, 'min_level': min_level, 'max_level': max_level, 'level_difference': level_difference, 'slope': slope, 'slope_per_year': slope_per_year, 'num_years': num_years, 'status': status})

# Create a DataFrame for regression results
regression_df = pd.DataFrame(regression_results)

# Save the DataFrame as Excel and CSV files
regression_df.to_excel('regression_results.xlsx', index=False)
regression_df.to_csv('regression_results.csv', index=False)

# Display the results
print("Selected Date:", selected_date)
print("Start Date:", start_date)
print("End Date:", selected_date)
print("\nRegression Results:")
print(regression_df)


Please enter a date (YYYY-MM): 2019-01-01


Selected Date: 2019-01-01 00:00:00
Start Date: 1989-01-01 00:00:00
End Date: 2019-01-01 00:00:00

Regression Results:
           site  min_level  max_level  level_difference     slope  \
0        Site_1    -203.71     -33.16            170.55 -0.462532   
1       Site_10    -207.21     -37.07            170.14 -0.480619   
2      Site_100    -199.84     -49.36            150.48 -0.421779   
3     Site_1000    -155.56     -39.54            116.02 -0.267903   
4     Site_1001    -228.25     -68.24            160.01 -0.429566   
...         ...        ...        ...               ...       ...   
3195   Site_995    -206.85     -57.35            149.50 -0.426251   
3196   Site_996    -198.63     -53.79            144.84 -0.413774   
3197   Site_997    -201.86     -63.55            138.31 -0.366523   
3198   Site_998    -187.45     -62.66            124.79 -0.315611   
3199   Site_999    -219.82     -48.69            171.13 -0.443852   

      slope_per_year  num_years      status  
0      