In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pygam import LinearGAM, GAM, s, l, te
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import os
import csv
import shutil
import traceback
import random
from datetime import datetime

In [2]:
def import_csv(input_no, *csvs):
    if input_no < 1 or input_no > len(csvs):
        raise ValueError("input_no should be between 1 and the number of CSV files")
    
    dfs = [pd.read_csv(csv, dtype={"ptcode": str}) for csv in csvs[:input_no]]
    full_wims = pd.concat(dfs)
    
    return full_wims#def import class

In [3]:
def create_pivot(df):
    #Create Pivot
    Est_pivot = df.pivot_table(values='result',index=['date','time','ptcode','easting','northing','site_avg_salinity','salinity_class'],columns=['detname'])
    #Drop the "Time" column from results as this duplicates a column elsewhere
    Est_pivot = Est_pivot.drop(columns = ['time'],axis=1,errors='ignore')
    #Reset the index
    Est_pivot.reset_index(inplace=True)
    #Create a column for year
    Est_pivot['date'] = pd.to_datetime(Est_pivot['date'])
    Est_pivot["year"]=Est_pivot["date"].dt.year
    #Create a column for month
    Est_pivot["month"]=Est_pivot["date"].dt.month
    Est_pivot.columns = Est_pivot.columns.str.replace(':', '')
    Est_pivot.columns = Est_pivot.columns.str.replace('%', '')
    Est_group_all = Est_pivot.copy().reset_index()

    return Est_group_all

In [7]:
current_directory = os.getcwd()
parent_directory = os.path.dirname(current_directory)
print(parent_directory)
output_data_files = os.path.join(parent_directory, "output_data")
input_model_files_directory = os.path.join(parent_directory, "input_model_files")

C:\Users\alechutchings\GAM_models_temp_do


In [8]:
csv_file_path = os.path.join(input_model_files_directory, "estuary_shapefile_dictionary.csv")
#csv_file_path = "C:\\Users\\alechutchings\\Documents\\PythonNotebooks\\estuary_shapefile_dictionary.csv"

# Initialize an empty dictionary
data_dict = {}

# Read the CSV file and populate the dictionary
with open(csv_file_path, 'r') as csv_file:
    csv_reader = csv.reader(csv_file)
    
    # Skip the header row if it exists
    next(csv_reader, None)
    
    # Iterate through rows and add key-value pairs to the dictionary
    for row in csv_reader:
        key = row[0]
        values = row[1:]  # Collect all values from the second column onwards
        data_dict[key] = values


In [9]:
estuary_list = list(data_dict.keys())
print(len(estuary_list))

83


In [12]:
det = 'Temperature of Water'
all_df = pd.DataFrame()

for estuary in estuary_list:
    try:
        output_data_directory = os.path.join(parent_directory, "output_data")
        estuary_folder_path = os.path.join(output_data_directory,f'{estuary}')

        #Direct to input csv
        csv_input_path = os.path.join(estuary_folder_path, f'{estuary}_wims_data_clean_nd_removed.csv')
        #Import function
        df = import_csv(1,csv_input_path)
        #Pivot function
        Est_group_all = create_pivot(df)

        #Create a new dataset with only entries for the determinand
        Det_not_na_msk = Est_group_all[det].notna()
        cols = ['date', 'time','ptcode']
        Est_group_all_det = Est_group_all.loc[Det_not_na_msk, cols]

        early_date = Est_group_all_det['date'].min()
        late_date = Est_group_all_det['date'].max()

        date_df =[]
        date_df.append({
            'Earliest date': early_date,
            'Latest date': late_date})

        date_df = pd.DataFrame(date_df)
        date_df['Estuary'] = estuary

        # Append the DataFrame to a master DataFrame (if it already exists)
        if 'all_df' not in locals():
            all_df = date_df.copy()
        else:
            all_df = pd.concat([all_df, date_df], ignore_index=True)

    except Exception as e:
        # Print the error message
        traceback.print_exc()
        error_message = f"Error processing {estuary}: {str(e)}"

        # Log the error to a notepad file
        log_file_path = os.path.join(output_data_directory,f'sample_window_error_log.txt')
        with open(log_file_path, 'a') as log_file:
            log_file.write(error_message + '\n')
        print(error_message)

    finally:
        # Any cleanup code or additional actions you want to perform regardless of success or failure
        pass

sample_window_folder_path = os.path.join(output_data_directory, "Sample_count_data")
sample_window_folder_path_csv = os.path.join(sample_window_folder_path, f"Sample_time_windows_{det}.csv")
all_df.to_csv(sample_window_folder_path_csv, index=False)


Traceback (most recent call last):
  File "C:\Users\alechutchings\AppData\Local\anaconda3\envs\geo_env\Lib\site-packages\pandas\core\indexes\base.py", line 3653, in get_loc
    return self._engine.get_loc(casted_key)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "pandas\_libs\index.pyx", line 147, in pandas._libs.index.IndexEngine.get_loc
  File "pandas\_libs\index.pyx", line 176, in pandas._libs.index.IndexEngine.get_loc
  File "pandas\_libs\hashtable_class_helper.pxi", line 7080, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas\_libs\hashtable_class_helper.pxi", line 7088, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Temperature of Water'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:\Users\alechutchings\AppData\Local\Temp\2\ipykernel_824\4126155582.py", line 17, in <module>
    Det_not_na_msk = Est_group_all[det].notna()
                     ~~~~~~~~~~~~~^^^^^
  File "C:\

Error processing Aln: 'Temperature of Water'


Traceback (most recent call last):
  File "C:\Users\alechutchings\AppData\Local\anaconda3\envs\geo_env\Lib\site-packages\pandas\core\indexes\base.py", line 3653, in get_loc
    return self._engine.get_loc(casted_key)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "pandas\_libs\index.pyx", line 147, in pandas._libs.index.IndexEngine.get_loc
  File "pandas\_libs\index.pyx", line 176, in pandas._libs.index.IndexEngine.get_loc
  File "pandas\_libs\hashtable_class_helper.pxi", line 7080, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas\_libs\hashtable_class_helper.pxi", line 7088, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Temperature of Water'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:\Users\alechutchings\AppData\Local\Temp\2\ipykernel_824\4126155582.py", line 17, in <module>
    Det_not_na_msk = Est_group_all[det].notna()
                     ~~~~~~~~~~~~~^^^^^
  File "C:\

Error processing Bristol_avon: 'Temperature of Water'


Traceback (most recent call last):
  File "C:\Users\alechutchings\AppData\Local\anaconda3\envs\geo_env\Lib\site-packages\pandas\core\indexes\base.py", line 3653, in get_loc
    return self._engine.get_loc(casted_key)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "pandas\_libs\index.pyx", line 147, in pandas._libs.index.IndexEngine.get_loc
  File "pandas\_libs\index.pyx", line 176, in pandas._libs.index.IndexEngine.get_loc
  File "pandas\_libs\hashtable_class_helper.pxi", line 7080, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas\_libs\hashtable_class_helper.pxi", line 7088, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Temperature of Water'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:\Users\alechutchings\AppData\Local\Temp\2\ipykernel_824\4126155582.py", line 17, in <module>
    Det_not_na_msk = Est_group_all[det].notna()
                     ~~~~~~~~~~~~~^^^^^
  File "C:\

Error processing Derwent: 'Temperature of Water'
Error processing Duddon: 'Temperature of Water'


Traceback (most recent call last):
  File "C:\Users\alechutchings\AppData\Local\anaconda3\envs\geo_env\Lib\site-packages\pandas\core\indexes\base.py", line 3653, in get_loc
    return self._engine.get_loc(casted_key)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "pandas\_libs\index.pyx", line 147, in pandas._libs.index.IndexEngine.get_loc
  File "pandas\_libs\index.pyx", line 176, in pandas._libs.index.IndexEngine.get_loc
  File "pandas\_libs\hashtable_class_helper.pxi", line 7080, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas\_libs\hashtable_class_helper.pxi", line 7088, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Temperature of Water'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:\Users\alechutchings\AppData\Local\Temp\2\ipykernel_824\4126155582.py", line 17, in <module>
    Det_not_na_msk = Est_group_all[det].notna()
                     ~~~~~~~~~~~~~^^^^^
  File "C:\

Error processing Maryport: 'Temperature of Water'


Traceback (most recent call last):
  File "C:\Users\alechutchings\AppData\Local\anaconda3\envs\geo_env\Lib\site-packages\pandas\core\indexes\base.py", line 3653, in get_loc
    return self._engine.get_loc(casted_key)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "pandas\_libs\index.pyx", line 147, in pandas._libs.index.IndexEngine.get_loc
  File "pandas\_libs\index.pyx", line 176, in pandas._libs.index.IndexEngine.get_loc
  File "pandas\_libs\hashtable_class_helper.pxi", line 7080, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas\_libs\hashtable_class_helper.pxi", line 7088, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Temperature of Water'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:\Users\alechutchings\AppData\Local\Temp\2\ipykernel_824\4126155582.py", line 17, in <module>
    Det_not_na_msk = Est_group_all[det].notna()
                     ~~~~~~~~~~~~~^^^^^
  File "C:\

Error processing Otter: 'Temperature of Water'


Traceback (most recent call last):
  File "C:\Users\alechutchings\AppData\Local\anaconda3\envs\geo_env\Lib\site-packages\pandas\core\indexes\base.py", line 3653, in get_loc
    return self._engine.get_loc(casted_key)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "pandas\_libs\index.pyx", line 147, in pandas._libs.index.IndexEngine.get_loc
  File "pandas\_libs\index.pyx", line 176, in pandas._libs.index.IndexEngine.get_loc
  File "pandas\_libs\hashtable_class_helper.pxi", line 7080, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas\_libs\hashtable_class_helper.pxi", line 7088, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Temperature of Water'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:\Users\alechutchings\AppData\Local\Temp\2\ipykernel_824\4126155582.py", line 17, in <module>
    Det_not_na_msk = Est_group_all[det].notna()
                     ~~~~~~~~~~~~~^^^^^
  File "C:\

Error processing Wey: 'Temperature of Water'
