In [1]:
import pandas as pd
import datetime
import matplotlib.pyplot as plt
import glob
import os
import metpy
import numpy as np
from datetime import datetime
import xarray as xr
import metpy.calc as mpcalc
from metpy.units import units
from metpy.calc import dewpoint_from_relative_humidity
from metpy.calc import specific_humidity_from_dewpoint
from pint import UnitRegistry
ureg = UnitRegistry()
import seaborn as sns

In [2]:


# Set the working directory
working_dir = "C:\\Year1\\"
data_path = os.path.join(working_dir+"NMVOC\\Data\\data_Meteo\\Meteo_ebas_11_15\\*.dat")
save_dir="C:\\Year1\\NMVOC\\Data\\data_Meteo\\"

# Find all .dat files
filename = glob.glob(data_path)

# List to store processed DataFrames
df_list = []

# Loop through each file and process it
for file in filename:
    print(f"Processing file: {file}")

    try:
        # Load data, skipping the first 28 rows (adjust as needed)
        df = pd.read_csv(file, sep=r'\s+', skiprows=28)

        # Add a DATETIME column
        df["DATETIME"] = pd.to_datetime(df["DATE"].astype(str) + " " + df["TIME"].astype(str), errors='coerce')

        # Set DATETIME as index
        df.index = df["DATETIME"]

        # Drop unnecessary columns
        df = df.drop(["DATE", "TIME", "DATETIME", "WD"], axis=1)

        # Remove rows where any column contains -99.9
        df = df[~df.eq(-99.9).any(axis=1)]
        # Remove rows where any column contains -999.9
        df = df[~df.eq(-999.9).any(axis=1)]
        
        # Drop columns containing "RAD" or "UVB" in the header
        df = df[[col for col in df.columns if "RAD" not in col and "UVB" not in col]]

        # Convert numeric columns to float
        df = df.apply(pd.to_numeric, errors="coerce")

        # Resample to hourly means
        hourly_df = df.resample("h").mean()

        # Rename columns
        rename_dict = {"WS": "wind_speed", "RH": "relative_humidity", "AP": "pressure", "AT": "temperature"}
        hourly_df.rename(columns=rename_dict, inplace=True)

        # Reorder columns
        column_order = ["wind_speed",	"temperature",	"relative_humidity",	"pressure"]
        hourly_df = hourly_df[column_order]

        # Append processed DataFrame to list
        df_list.append(hourly_df)

    except Exception as e:
        print(f"Error processing {file}: {e}")

# Concatenate all processed DataFrames
if df_list:
    final_df = pd.concat(df_list)

    # Save final concatenated DataFrame
    output_file = os.path.join(save_dir, "processed_met_11_14.csv")
    final_df.to_csv(output_file, index=True)

    print(f"Saved final concatenated data to: {output_file}")

print("Processing complete for all .dat files.")
print (final_df)





Processing file: C:\Year1\NMVOC\Data\data_Meteo\Meteo_ebas_11_15\cmn644n00.isac.xx.xx.met.nl.ev2011.dat
Processing file: C:\Year1\NMVOC\Data\data_Meteo\Meteo_ebas_11_15\cmn644n00.isac.xx.xx.met.nl.ev2012.dat
Processing file: C:\Year1\NMVOC\Data\data_Meteo\Meteo_ebas_11_15\cmn644n00.isac.xx.xx.met.nl.ev2013.dat
Processing file: C:\Year1\NMVOC\Data\data_Meteo\Meteo_ebas_11_15\cmn644n00.isac.xx.xx.met.nl.ev2014.dat
Saved final concatenated data to: C:\Year1\NMVOC\Data\data_Meteo\processed_met_11_14.csv
Processing complete for all .dat files.
                     wind_speed  temperature  relative_humidity  pressure
DATETIME                                                                 
2011-01-18 12:00:00        6.50         4.40              33.65    793.95
2011-01-18 13:00:00        7.45         4.45              33.60    793.45
2011-01-18 14:00:00        7.25         4.55              32.10    793.10
2011-01-18 15:00:00        7.65         4.25              33.40    793.00
2011-01-18 