In [23]:
import pandas as pd
import zipfile
import os
import numpy as np
import matplotlib.pyplot as plt
import chardet

Set Papermill variables

In [24]:
input_path = "../data/sediments-data/data files.zip"
temp_extract_path = "temp_folder_for_extraction"
output_path = "../data/outputs"

 Unzipping files

In [25]:
# 1. Unzipping the ZIP file
with zipfile.ZipFile(input_path, 'r') as zip_ref:
    zip_ref.extractall(temp_extract_path)

# 2. Walking the extracted folder and reading the CSV
dataframes = {}  # Storing the dataframes using the filename as the key
for dirpath, dirnames, filenames in os.walk(temp_extract_path):
    for file in filenames:
        if file.endswith('.csv'):
            file_path = os.path.join(dirpath, file)
            
            # Skipping the specified file (wrong csv structure in Zenodo)
            if file_path == os.path.join(temp_extract_path, 'data files/Sediment_properties/Standard_methods/UNE_EN_1097-7_method.csv'):
                print(f"Skipping file: {file_path}")
                continue
            
            print(f"Reading file: {file_path}")
            
            # Detecting the encoding of the file
            with open(file_path, 'rb') as f:
                result = chardet.detect(f.read())
                encoding = result['encoding']
            
            try:
                dataframes[file] = pd.read_csv(file_path, delimiter=';', encoding=encoding)
            except pd.errors.ParserError as e:
                print(f"Failed to read {file_path}. Error: {e}")
            except UnicodeDecodeError as e:
                print(f"Encoding issue with {file_path}. Error: {e}")

# 4. Cleaning up the extracted files
import shutil
shutil.rmtree(temp_extract_path)

Reading file: temp_folder_for_extraction/data files/Measuring_info.csv
Reading file: temp_folder_for_extraction/data files/CycleB1_2cm/C1_Temperatures_RawSignal(RTD).csv
Reading file: temp_folder_for_extraction/data files/CycleB1_2cm/C1_Temperatures_Processed(degC).csv
Reading file: temp_folder_for_extraction/data files/PulseB0_room&wbath_Temp/Room_Temperature_Processed(degC).csv
Reading file: temp_folder_for_extraction/data files/PulseB0_room&wbath_Temp/Room_Temperature_RawSignal(RTD).csv
Reading file: temp_folder_for_extraction/data files/CycleB3_6cm/C3_Temperatures_Processed(degC).csv
Reading file: temp_folder_for_extraction/data files/CycleB3_6cm/C3_Temperatures_RawSignal(RTD).csv
Reading file: temp_folder_for_extraction/data files/Sediment_properties/Sediment_prop.csv
Skipping file: temp_folder_for_extraction/data files/Sediment_properties/Standard_methods/UNE_EN_1097-7_method.csv
Reading file: temp_folder_for_extraction/data files/Sediment_properties/Standard_methods/APHA_methods

In [27]:
 # Check if PAPERMILL_EXECUTION variable is present

# 3. Save each dataframe to CSV in the outputs directory
os.makedirs(output_path, exist_ok=True)
print(f"Saving dataframes to directory: {output_path}")
abs_output_dir = os.path.abspath(output_path)
print(f"Absolute output directory: {abs_output_dir}")
for filename, df in dataframes.items():
    output_file_path = os.path.join(output_path, filename)
    df.to_csv(output_file_path, index=False)
    print(f"Saved dataframe to: {output_file_path}")

Saving dataframes to directory: ../data/outputs
Absolute output directory: /home/jovyan/work/coudlabs-temperatura-2024/data/outputs
Saved dataframe to: ../data/outputs/Measuring_info.csv
Saved dataframe to: ../data/outputs/C1_Temperatures_RawSignal(RTD).csv
Saved dataframe to: ../data/outputs/C1_Temperatures_Processed(degC).csv
Saved dataframe to: ../data/outputs/Room_Temperature_Processed(degC).csv
Saved dataframe to: ../data/outputs/Room_Temperature_RawSignal(RTD).csv
Saved dataframe to: ../data/outputs/C3_Temperatures_Processed(degC).csv
Saved dataframe to: ../data/outputs/C3_Temperatures_RawSignal(RTD).csv
Saved dataframe to: ../data/outputs/Sediment_prop.csv
Saved dataframe to: ../data/outputs/APHA_methods.csv
Saved dataframe to: ../data/outputs/ISO_2591-1_1988_method.csv
Saved dataframe to: ../data/outputs/ISO_13320_2009_method.csv
Saved dataframe to: ../data/outputs/Step3_Temperatures_RawSignal(RTD).csv
Saved dataframe to: ../data/outputs/Step4_Temperatures_RawSignal(RTD).csv
Sa