In [None]:
"""
Sensor Data Cleaning & Moving Average Smoothing
------------------------------------------------
This script loads multiple EasyLog sensor files, detects delimiters,
cleans and merges the data, applies moving-average smoothing windows,
generates plots, and outputs statistical comparisons.

Author: Your Name
Date: YYYY-MM-DD
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import csv

# =============================
# CONFIGURATION
# =============================
FILES = [
    "EasyLog USB 1_04aug2025_13h33.txt",
    "EasyLog USB 1_12aug2025_14h01.txt",
    "EasyLog USB 1_19aug2025_12h16.txt"
]

OUTPUT_FIG_DIR = "figures"
os.makedirs(OUTPUT_FIG_DIR, exist_ok=True)

In [None]:

# =============================
# 1) DELIMITER DETECTOR
# =============================
def detect_delimiter(file_path):
    """Automatically detect delimiter in a text file."""
    with open(file_path, 'r', encoding='latin-1') as f:
        for line in f:
            if line.strip():
                try:
                    return csv.Sniffer().sniff(line).delimiter
                except:
                    return ";"
    return ";"


In [None]:


# =============================
# 2) LOAD & CLEAN FILES
# =============================
print("\n1) Reading and merging txt files...")
data_frames = []

for file in FILES:
    if not os.path.exists(file):
        print(f"‚ùå File not found: {file}")
        continue

    delimiter = detect_delimiter(file)
    print(f"\nüìå Detected delimiter for {file}: '{delimiter}'")

    df = pd.read_csv(file, encoding="latin-1", delimiter=delimiter)
    df.columns = [c.strip() for c in df.columns]

    # Convert types
    df["Time"] = pd.to_datetime(df["Time"], errors="coerce")
    df = df.dropna(subset=["Time"])

    df["Temperature"] = pd.to_numeric(df["Celsius(¬∞C)"], errors="coerce")
    df["Humidity"]    = pd.to_numeric(df["Humidity(%rh)"], errors="coerce")
    df["Dewpoint"]    = pd.to_numeric(df["Dew Point(¬∞C)"], errors="coerce")

    df = df[["Time","Temperature","Humidity","Dewpoint"]]
    df = df.set_index("Time").sort_index()

    data_frames.append(df)

    print(f"{file} ‚Äî Valid samples: {len(df)}")

# Merge all files
df_combined = pd.concat(data_frames).sort_index()

# Sampling interval
median_interval_sec = (
    df_combined.index.to_series()
    .diff()
    .dropna()
    .median()
    .total_seconds()
)

print(f"\nMedian sampling interval = {median_interval_sec:.2f} seconds")


In [None]:

# =============================
# 3) MOVING AVERAGE WINDOWS
# =============================
def window_points(minutes):
    return int(round(minutes * 60 / median_interval_sec))

WINDOWS = {
    "MA_30min": window_points(30),
    "MA_6h":    window_points(360),
    "MA_12h":   window_points(720),
}

# Apply smoothing
for name, w in WINDOWS.items():
    for col in ["Temperature", "Humidity", "Dewpoint"]:
        df_combined[f"{name}_{col}"] = (
            df_combined[col]
            .rolling(w, min_periods=1, center=True)
            .mean()
        )


In [None]:

# =============================
# 4) PLOT ORIGINAL DATA
# =============================
print("\n2) Plotting original signals...")

plt.rcParams.update({
    "axes.linewidth": 1.2,
    "font.size": 11,
    "lines.linewidth": 2.2
})

fig, ax1 = plt.subplots(figsize=(16, 6))

# Left axis
ax1.plot(df_combined.index, df_combined["Temperature"], color="red", label="Temperature")
ax1.plot(df_combined.index, df_combined["Dewpoint"], color="green", linestyle=":", label="Dew Point")
ax1.set_ylabel("Temperature / Dew Point (¬∞C)")
ax1.set_xlabel("Time")

# Right axis
ax2 = ax1.twinx()
ax2.plot(df_combined.index, df_combined["Humidity"], color="blue", linestyle="--", label="Humidity")
ax2.set_ylabel("Humidity (%)")

lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines1 + lines2, labels1 + labels2, loc="upper left")

ax1.grid(True, linestyle="--", alpha=0.35)
plt.title("Temperature, Humidity, & Dew Point ‚Äî Original Signals")

plt.tight_layout()
plt.savefig(f"{OUTPUT_FIG_DIR}/Original_All_Signals.png", dpi=300)
plt.close()


In [None]:



# =============================
# 5) SUBPLOTS FOR SMOOTHED DATA
# =============================
print("3) Generating subplot figures...")

signals = ["Temperature", "Humidity", "Dewpoint"]
units = ["¬∞C", "%", "¬∞C"]
ma_keys = list(WINDOWS.keys())
colors = {"MA_30min": "red", "MA_6h": "green", "MA_12h": "blue"}

for sig, unit in zip(signals, units):
    fig, axes = plt.subplots(3, 1, figsize=(16, 12), sharex=True)
    fig.suptitle(f"{sig} ‚Äî Original vs Smoothed")

    for i, ma_key in enumerate(ma_keys):
        ax = axes[i]
        ax.plot(df_combined.index, df_combined[sig], color="black", alpha=0.5, label="Original")
        ax.plot(df_combined.index, df_combined[f"{ma_key}_{sig}"], color=colors[ma_key], linewidth=2, label="Smoothed")

        # Window size annotation
        duration_hr = round(WINDOWS[ma_key] * median_interval_sec / 3600, 2)
        ax.set_title(f"{ma_key} (~{duration_hr} hours)")
        ax.set_ylabel(unit)
        ax.grid(True)
        ax.legend()

    axes[-1].set_xlabel("Time")
    plt.tight_layout()
    plt.savefig(f"{OUTPUT_FIG_DIR}/{sig}_SUBPLOTS.png", dpi=300)
    plt.close()


In [None]:



# =============================
# 6) STATISTICAL COMPARISON
# =============================
print("\n4) Statistical comparison:")

stats_tables = {}
versions = ["Original", "MA_30min", "MA_6h", "MA_12h"]

for sig in signals:
    print(f"\n--- {sig} ---")
    rows = []

    for v in versions:
        col_name = sig if v == "Original" else f"{v}_{sig}"
        series = df_combined[col_name].dropna()

        stats = {
            "Version": v,
            "Mean": round(series.mean(), 3),
            "Std": round(series.std(), 3),
            "Min": round(series.min(), 3),
            "Max": round(series.max(), 3),
            "Range": round(series.max() - series.min(), 3),
            "Median": round(series.median(), 3),
            "Variance": round(series.var(), 3),
        }

        rows.append(stats)

    df_stats = pd.DataFrame(rows)
    stats_tables[sig] = df_stats
    print(df_stats.to_string(index=False))

print("\nProcessing complete. Figures saved in:", OUTPUT_FIG_DIR)
