In [None]:
import pandas as pd
import pickle
from pathlib import Path
import os
import xarray as xr

# Get the current working directory and parent directory
cwd = os.getcwd()
parent_pad = Path(cwd).parent  # Convert to Path object and get the parent directory
print(parent_pad)

# Define the path to the NetCDF file
path = parent_pad / "output_caravan/timeseries/netcdf/camels/camels_01013500.nc"

# Open the dataset
data = xr.open_dataset(path)


print(data['streamflow'])



In [None]:

# Get the current working directory and parent directory
cwd = os.getcwd()
parent_pad = Path(cwd).parent  # Convert to Path object and get the parent directory
print(parent_pad)

# Define the path to the NetCDF file
path = parent_pad / "output_caravan_EU/timeseries/netcdf/6/6.nc"

# Open the dataset
data = xr.open_dataset(path)



# Calculate the mean of streamflow along the time dimension
mean_streamflow_time = data['streamflow'].mean()
print("Mean Streamflow (time dimension):", mean_streamflow_time)



In [None]:
# Re-importing necessary libraries after environment reset
import os
import pandas as pd
import matplotlib.pyplot as plt


# Get the current working directory and parent directory
cwd = os.getcwd()
parent_pad = Path(cwd).parent  # Convert to Path object and get the parent directory
print(parent_pad)

# Define the path to the NetCDF file
parent_path = parent_pad / "output_caravan/attributes"

# Initialize an empty list to store area data
area_values = []

# Loop through each folder in the parent directory
for folder in os.listdir(parent_path):
    folder_path = os.path.join(parent_path, folder)
    if os.path.isdir(folder_path):
        # Construct the path to the CSV file
        csv_file = os.path.join(folder_path, f"attributes_other_{folder}.csv")
        if os.path.isfile(csv_file):
            # Read the CSV file and append the 'area' column to the list
            df = pd.read_csv(csv_file)
            area_values.extend(df['area'])

print(max(area_values))
print(min(area_values))
print(sum(area_values) / len(area_values))


# Create a histogram of the area values
plt.figure(figsize=(10, 6))
plt.xlim(min(area_values), max(area_values))
plt.hist(area_values, bins=20, color='blue', edgecolor='black', alpha=0.7)
plt.title("Histogram of Area Values")
plt.xlabel("Area")
plt.ylabel("Frequency")
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()


In [None]:
filtered_area_values = [value for value in area_values if 50 <= value <= 250]
print(filtered_area_values)
print(len(filtered_area_values))
print(len(area_values))


In [None]:
import os
import pandas as pd

# Get the current working directory and parent directory
cwd = os.getcwd()
parent_pad = Path(cwd).parent  # Convert to Path object and get the parent directory
print(parent_pad)

# Define the path to the NetCDF file
parent_path = parent_pad / "output_caravan/attributes"
# List all the folders in the parent directory
folders = [f for f in os.listdir(parent_path) if os.path.isdir(os.path.join(parent_path, f))]

# Process each folder
for folder in folders:
    folder_path = os.path.join(parent_path, folder)
    
    # Find all CSV files in the folder
    csv_files = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith('.csv')]
    
    # Read the CSV files and concatenate columns
    dataframes = [pd.read_csv(csv_file) for csv_file in csv_files]
    combined_df = pd.concat([df.set_index(df.columns[0]) for df in dataframes], axis=1)
    combined_df.reset_index(inplace=True)  # Reset index to make the first column normal again
    
    # Save the combined file
    output_path = os.path.join(folder_path, "attributes.csv")
    combined_df.to_csv(output_path, index=False)

"Completed merging CSV files in all folders."


In [None]:
# Define the output path for the final merged CSV
final_output_path = os.path.join(parent_path, "attributes.csv")
print(final_output_path)

# Initialize an empty DataFrame for merging all attribute files
merged_attributes = pd.DataFrame()

# Loop through each folder again to read the created 'attributes.csv' and merge
for folder in folders:
    # Path to the created attributes.csv in the current folder
    file_path = os.path.join(parent_path, folder, "attributes.csv")
    print(file_path)
    
    # Read the CSV file and append to the final DataFrame
    if os.path.exists(file_path):
        df = pd.read_csv(file_path)
        merged_attributes = pd.concat([merged_attributes, df], axis=0, ignore_index=True)

# Save the final merged DataFrame to the attributes directory
merged_attributes.to_csv(final_output_path, index=False)

final_output_path


In [None]:
import os
import pandas as pd
from pathlib import Path

# Get the current working directory and parent directory
cwd = os.getcwd()
parent_pad = Path(cwd).parent  # Convert to Path object and get the parent directory
print(parent_pad)

# Define the path to the CSV file
attributes_path = parent_pad / "output_caravan/attributes/attributes.csv"

# Read the CSV file
attributes_df = pd.read_csv(attributes_path, low_memory=False)

# Keep only rows where the 'area' column is between 50 and 250
filtered_df = attributes_df[attributes_df['area'].between(50, 250)]

# Verify that the filtering works
print(f"Rows before filtering: {len(attributes_df)}")
print(f"Rows after filtering: {len(filtered_df)}")

# Optionally, save the filtered DataFrame to a new CSV file (if desired)
filtered_df.to_csv(parent_pad / "output_caravan/attributes/filtered_attributes.csv", index=False)

print("Filtering complete and saved.")


In [None]:
import os
import pandas as pd
from pathlib import Path

# Get the current working directory and parent directory
cwd = os.getcwd()
parent_pad = Path(cwd).parent  # Convert to Path object and get the parent directory
print(parent_pad)

# Define the path to the CSV file
attributes_path = parent_pad / "output_caravan/attributes/filtered_attributes.csv"

# Read the CSV file without assuming the first row is the header
attributes_df = pd.read_csv(attributes_path, header=None, low_memory=False)

# Extract the first row to be used as the header (it is currently treated as the first data row)
header_row = attributes_df.iloc[0]  # The first row (which is currently in position 0)

# Loop through all rows starting from the second row (index 1)
for index, row in attributes_df.iloc[1:].iterrows():  # Iterate over all rows starting from the second row

    # Check if the row is valid (non-empty)
    if row.isnull().any():  # If any element in the row is NaN, consider it skipped
        print(f"Skipped row {index} due to NaN values")
        continue  # Skip the current row

    # Extract the full gauge_id (first column)
    original_name = row[0]  # First column value of the row

    # Create the folder name using the full gauge_id (no need to split anymore)
    folder_name = original_name.lstrip('0')  # Remove leading zeros for folder naming (optional)

    # Create the folder path
    folder_path = os.path.join(os.path.dirname(attributes_path), folder_name)
    
    # Create the folder if it doesn't exist
    os.makedirs(folder_path, exist_ok=True)
    
    # Create a new DataFrame for the current row, with the header row included
    new_csv_data = pd.DataFrame([row.values], columns=header_row)  # Use the header row
    
    # Update the gauge_id column to match the full gauge_id
    new_csv_data[header_row[0]] = original_name  # Set the full gauge_id in the first column

    # Create the new CSV file path
    new_csv_path = os.path.join(folder_path, f"{folder_name}.csv")
    
    # Save the new CSV file with the header
    new_csv_data.to_csv(new_csv_path, index=False, header=True)

print("Folders and CSV files created successfully.")



In [None]:
import os
import shutil
from pathlib import Path

# Get the current working directory and parent directory
cwd = os.getcwd()
parent_pad = Path(cwd).parent  # Convert to Path object and get the parent directory
print(parent_pad)

# Define the path to the NetCDF folder
parent_path = parent_pad / "output_caravan/timeseries/netcdf"

# List all the subfolders (assuming the .nc files are in subfolders)
folders = [f for f in os.listdir(parent_path) if os.path.isdir(os.path.join(parent_path, f))]

# Loop through each folder
for folder in folders:
    folder_path = os.path.join(parent_path, folder)
    
    # List all .nc files in the current folder
    nc_files = [f for f in os.listdir(folder_path) if f.endswith('.nc')]
    
    # Loop through each .nc file
    for nc_file in nc_files:
        # Create a new folder with the same name as the .nc file (excluding the .nc extension)
        new_folder_name = nc_file.replace('.nc', '')
        new_folder_path = os.path.join(folder_path, new_folder_name)
        
        # Create the new folder if it doesn't exist
        os.makedirs(new_folder_path, exist_ok=True)
        
        # Move the .nc file into the new folder
        old_file_path = os.path.join(folder_path, nc_file)
        new_file_path = os.path.join(new_folder_path, nc_file)
        
        shutil.move(old_file_path, new_file_path)
        print(f"Moved {nc_file} to {new_folder_name}/")

print("Files moved successfully.")


In [None]:
import os
import shutil
from pathlib import Path

# Get the current working directory and parent directory
cwd = os.getcwd()
parent_pad = Path(cwd).parent  # Convert to Path object and get the parent directory

# Define the path to the NetCDF folder (parent_path)
parent_path = parent_pad / "output_caravan/timeseries/netcdf"

# Define the path to the attributes folder (parent_path_2)
parent_path_2 = parent_pad / "output_caravan/attributes"

# Get a list of all folder names in parent_path_2 (attributes folder)
folders_in_parent_path_2 = [folder.name for folder in parent_path_2.iterdir() if folder.is_dir()]

# List all folders in parent_path (NetCDF folder)
folders_in_parent_path = [folder.name for folder in parent_path.iterdir() if folder.is_dir()]

# Iterate through folders in parent_path
for folder in folders_in_parent_path:
    # If the folder is not in the list of folders in parent_path_2, delete it
    if folder not in folders_in_parent_path_2:
        folder_path_to_remove = parent_path / folder
        print(f"Deleting folder: {folder_path_to_remove}")  # Debugging
        shutil.rmtree(folder_path_to_remove)  # Delete the folder



In [None]:
import os

def save_folder_names_to_txt(directory, output_file):
    """
    Writes the names of all folders in the specified directory to a text file.

    Args:
        directory (str): The path to the directory.
        output_file (str): The path to the output text file.
    """
    try:
        # Get all folder names in the directory
        folder_names = [name for name in os.listdir(directory) if os.path.isdir(os.path.join(directory, name))]

        # Write folder names to the output file
        with open(output_file, 'w') as file:
            for folder in folder_names:
                file.write(f"{folder}\n")

        print(f"Folder names have been written to {output_file}")

    except Exception as e:
        print(f"An error occurred: {e}")



cwd = os.getcwd()
parent_pad = Path(cwd).parent
directory =  parent_pad / 'output_caravan' / 'attributes'
output_file = parent_pad / 'folder_names.txt'
save_folder_names_to_txt(directory, output_file)
