## Importing Libraries

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import glob
import os

def get_csv_files():
    current_dir = os.getcwd()
    parent_dir = os.path.dirname(current_dir)
    raw_data_dir = os.path.join(parent_dir, "Raw Station Datasets")
    csv_files = glob.glob(os.path.join(raw_data_dir, '*.csv'))
    filtered_files = sorted([file for file in csv_files if os.path.basename(file)[:3].isdigit()],
                            key=lambda x: int(os.path.basename(x)[:3]))
    return filtered_files

def process_csv_file(file):
    try:
        with open(file, 'r', encoding='utf-8', errors='ignore') as f:
            meta_lines = [f.readline().strip() for _ in range(12)]
        
        headers = meta_lines[10].split(',')
        units = meta_lines[11].split(',')
        new_headers = [f'{header.strip()}({unit.strip()})' if unit.strip() else header.strip()
                       for header, unit in zip(headers, units)]
        
        df = pd.read_csv(file, skiprows=12, names=new_headers, low_memory=False)
        
        date_column = next((col for col in df.columns if 'date' in col.lower() or 'time' in col.lower()), None)
        if date_column is None:
            raise ValueError("No date/time column found in file.")
        
        df[date_column] = pd.to_datetime(df[date_column], errors='coerce')
        df = df.rename(columns={date_column: 'Date_time'})
        df.set_index('Date_time', inplace=True)
        
        return df
    except Exception as e:
        print(f"Error processing {os.path.basename(file)}: {e}")
        return None

In [2]:
csv_files = get_csv_files()
single_station_file = csv_files[5]
single_station_data = process_csv_file(single_station_file)

In [3]:
single_station_data

Unnamed: 0_level_0,Station_ID,air_temp_set_1(Celsius),relative_humidity_set_1(%),wind_speed_set_1(m/s),wind_direction_set_1(Degrees),solar_radiation_set_1(W/m**2),soil_temp_set_1(Celsius),precip_accum_five_minute_set_1(Millimeters),soil_moisture_set_1(%),soil_moisture_set_2(%),...,net_radiation_set_1(W/m**2),net_radiation_sw_set_1(W/m**2),net_radiation_lw_set_1(W/m**2),outgoing_radiation_sw_set_1(W/m**2),incoming_radiation_lw_set_1(W/m**2),outgoing_radiation_lw_set_1(W/m**2),wind_chill_set_1d(Celsius),wind_cardinal_direction_set_1d(code),heat_index_set_1d(Celsius),dew_point_temperature_set_1d(Celsius)
Date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-04-25 00:00:00+00:00,006HI,19.82,92.54,3.47,94.5,80.71,21.79,0.0,76.2,70.3,...,58.05,69.06,-11.02,11.43,417.25,429.06,,E,,18.57
2022-04-25 00:05:00+00:00,006HI,19.78,93.15,3.33,106.8,116.51,21.80,0.0,76.2,70.3,...,87.32,99.86,-12.53,16.93,416.88,430.17,,ESE,,18.63
2022-04-25 00:10:00+00:00,006HI,19.94,93.73,2.94,105.2,169.84,21.79,0.0,76.2,70.3,...,128.35,144.84,-16.50,25.32,416.11,433.41,,ESE,,18.89
2022-04-25 00:15:00+00:00,006HI,20.09,93.62,3.88,102.8,164.95,21.76,0.0,76.2,70.3,...,124.70,140.95,-16.25,23.56,417.19,434.25,,ESE,,19.02
2022-04-25 00:20:00+00:00,006HI,19.98,93.86,3.58,99.4,73.22,21.74,0.0,76.1,70.3,...,54.50,63.15,-8.65,10.00,419.58,429.09,,E,,18.95
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-13 11:40:00+00:00,006HI,18.65,70.92,2.46,187.0,0.00,21.96,0.0,58.5,65.2,...,-51.04,-0.41,-50.63,0.00,352.13,402.76,,S,,13.24
2024-10-13 11:45:00+00:00,006HI,18.70,71.36,2.46,189.8,0.00,21.93,0.0,58.5,65.2,...,-41.82,-0.32,-41.50,0.01,362.78,404.28,,S,,13.38
2024-10-13 11:50:00+00:00,006HI,18.60,72.25,1.87,193.9,0.00,21.91,0.0,58.5,65.2,...,-24.16,-0.13,-24.03,0.00,380.97,405.01,,SSW,,13.47
2024-10-13 11:55:00+00:00,006HI,18.48,73.31,2.10,194.7,0.00,21.89,0.0,58.5,65.2,...,-23.18,-0.13,-23.05,0.00,382.82,405.87,,SSW,,13.58


In [4]:
print(single_station_data.columns)

Index(['Station_ID', 'air_temp_set_1(Celsius)', 'relative_humidity_set_1(%)',
       'wind_speed_set_1(m/s)', 'wind_direction_set_1(Degrees)',
       'solar_radiation_set_1(W/m**2)', 'soil_temp_set_1(Celsius)',
       'precip_accum_five_minute_set_1(Millimeters)', 'soil_moisture_set_1(%)',
       'soil_moisture_set_2(%)', 'soil_moisture_set_3(%)', 'volt_set_1(volts)',
       'net_radiation_set_1(W/m**2)', 'net_radiation_sw_set_1(W/m**2)',
       'net_radiation_lw_set_1(W/m**2)', 'outgoing_radiation_sw_set_1(W/m**2)',
       'incoming_radiation_lw_set_1(W/m**2)',
       'outgoing_radiation_lw_set_1(W/m**2)', 'wind_chill_set_1d(Celsius)',
       'wind_cardinal_direction_set_1d(code)', 'heat_index_set_1d(Celsius)',
       'dew_point_temperature_set_1d(Celsius)'],
      dtype='object')
