# Sensor Data Analysis and Plotting
This notebook contains code for loading sensor data, processing it, and generating various plots for analysis.

## Import Required Libraries
The following cell imports the necessary libraries for data manipulation and visualization.

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import os

## Load Datasets
The following cell defines a function to load datasets with proper date parsing and loads the required datasets.

In [2]:
def load_data(file_path):
    df = pd.read_csv(file_path, parse_dates=['receivedAt'])
    df['receivedAt'] = pd.to_datetime(df['receivedAt'])
    df.set_index('receivedAt', inplace=True)
    df.index = df.index.tz_localize(None) if df.index.tz is not None else df.index
    return df

control_daily = load_data('mean_std_data/combined_termoigrometro_PAR_control_daily_mean_std.csv')
shaded_daily = load_data('mean_std_data/combined_termoigrometro_PAR_shaded_daily_mean_std.csv')
control_hourly = load_data('mean_std_data/combined_termoigrometro_PAR_control_hourly_mean_std.csv')
shaded_hourly = load_data('mean_std_data/combined_termoigrometro_PAR_shaded_hourly_mean_std.csv')

## Define Variables to Plot
The following cell defines the variables that will be used for plotting.

In [3]:
variables = ['temperature', 'temperature_daytime', 'temperature_nighttime', 'humidity', 'vaporPressureDeficit', 'photosyntheticallyActiveRadiation']

## Plot Comparison Function
This cell defines a function to plot comparisons between control and shaded data for a given variable and time period.

In [4]:
def plot_comparison(control_df, shaded_df, variable, start_date, end_date, save_path, time_scale='daily'):
    """Plots mean comparison for a given time period and saves it."""

    # Map variable names to y-axis labels
    y_axis_labels = {
        "temperature": "temperature (°C)",
        "temperature_daytime": "temperature_daytime (°C)",
        "temperature_nighttime": "temperature_nighttime (°C)",
        "humidity": "RH (%)",
        "photosyntheticallyActiveRadiation": "PAR (µmol m⁻² s⁻¹)"
    }

    # Filter data for the date range
    mask = (control_df.index >= start_date) & (control_df.index <= end_date)
    control_df = control_df.loc[mask]
    shaded_df = shaded_df.loc[mask]

    if control_df.empty or shaded_df.empty:
        print(f"Skipping {variable} for {start_date} to {end_date} (no data)")
        return

    mean_col = f"{variable}_mean_mean"
    std_col = f"{variable}_mean_std"

    # Skip plotting if required columns are missing
    missing_cols = [
        col for col in [mean_col, std_col]
        if col not in control_df.columns or col not in shaded_df.columns
    ]
    if mean_col not in control_df.columns or mean_col not in shaded_df.columns:
        print(f"Skipping {variable} for {start_date.date()} — missing required column: '{mean_col}'")
        return

    fig, ax = plt.subplots(figsize=(12, 6))

    # Plot mean lines
    ax.plot(control_df.index, control_df[mean_col], label='Control', color='black', linestyle="-", linewidth=1)
    if std_col in control_df.columns:
        ax.fill_between(control_df.index,
                        control_df[mean_col] - 1.96 * control_df[std_col],
                        control_df[mean_col] + 1.96 * control_df[std_col],
                        color='black', alpha=0.2)

    ax.plot(shaded_df.index, shaded_df[mean_col], label='Shaded', color='grey', linestyle="--", linewidth=1)
    if std_col in shaded_df.columns:
        ax.fill_between(shaded_df.index,
                        shaded_df[mean_col] - 1.96 * shaded_df[std_col],
                        shaded_df[mean_col] + 1.96 * shaded_df[std_col],
                        color='grey', alpha=0.2)

    # Optional red threshold lines
    if variable in ['temperature', 'temperature_daytime', 'temperature_nighttime']:
        ax.axhline(y=26, color='red', linestyle='--', linewidth=1, label='Threshold (26°C)')
    elif variable == 'humidity':
        ax.axhline(y=60, color='red', linestyle='--', linewidth=1, label='Threshold (60%)')

    # Set title (only date or month)
    if time_scale == 'daily':
        title_str = start_date.strftime('%Y-%m-%d') if start_date == end_date else f"{start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}"
    elif time_scale == 'monthly':
        title_str = start_date.strftime('%B %Y')
    else:
        title_str = f"{start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}"

    ax.set_title(title_str)

    # Use personalized y-axis label if available
    y_label = y_axis_labels.get(variable, variable.capitalize())
    ax.set_ylabel(y_label)
    ax.legend()

    end_date = end_date - pd.Timedelta(seconds=1)  # Trim range

    # X-axis formatting
    if time_scale == 'hourly':
        ax.xaxis.set_major_locator(mdates.HourLocator(interval=3))
        ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
        ax.set_xlabel('Time of Day')
    else:
        ax.xaxis.set_major_locator(mdates.DayLocator())
        ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
        ax.set_xlabel('Date')
        ax.set_xlim(start_date, end_date)

    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()

    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    plt.savefig(save_path)
    plt.close()
    print(f"Saved plot: {save_path}")


## Plot Monthly Comparisons
The following cell defines a function to generate monthly comparison plots for all variables.

In [5]:
def plot_monthly_comparison():
    """Plots data for each month using daily data."""
    # Create main folders if they don't exist
    os.makedirs("plots/monthly", exist_ok=True)

    # Get all unique year-month combinations
    unique_months = control_daily.index.to_period('M').unique()

    for month_period in unique_months:
        start_date = month_period.start_time
        end_date = month_period.end_time

        # Create variable folders for this month
        for var in variables:
            variable_folder = f"plots/monthly/{var}"
            os.makedirs(variable_folder, exist_ok=True)

            save_path = f"{variable_folder}/{start_date.strftime('%Y-%m')}.png"

            # Check if the std column exists for the variable
            std_col = f"{var}_mean_std"
            if std_col not in control_daily.columns or std_col not in shaded_daily.columns:
                print(f"Skipping std for {var} as it is missing.")
                plot_comparison(control_daily, shaded_daily, var, start_date, end_date, save_path, time_scale='daily')
            else:
                plot_comparison(control_daily, shaded_daily, var, start_date, end_date, save_path, time_scale='daily')

## Plot Daily Comparisons
This cell defines a function to generate daily comparison plots for all variables.

In [6]:
def plot_daily_comparison():
    """Plots data for each day using hourly data."""
    # Create main folders if they don't exist
    os.makedirs("plots/daily", exist_ok=True)

    # Get all unique dates in the hourly data
    unique_dates = pd.to_datetime(control_hourly.index.date).unique()

    for date in unique_dates:
        start_date = pd.to_datetime(date)
        end_date = start_date + pd.Timedelta(days=1) - pd.Timedelta(seconds=1)

        for var in variables:
            # Skip if the variable is "temperature_daytime" or "temperature_nighttime"
            if var in ["temperature_daytime_mean_mean", "temperature_nighttime_mean_mean"]:
                if var not in control_hourly.columns or var not in shaded_hourly.columns:
                    print(f"Skipping {var} for {start_date.date()} as it is missing in data.")
                    continue

            # Create folder and plot
            variable_folder = f"plots/daily/{var}"
            os.makedirs(variable_folder, exist_ok=True)
            save_path = f"{variable_folder}/{start_date.strftime('%Y-%m-%d')}.png"

            std_col = f"{var}_mean_std"
            if std_col not in control_hourly.columns or std_col not in shaded_hourly.columns:
                print(f"Skipping std for {var} on {start_date.date()} as it is missing.")
                plot_comparison(control_hourly, shaded_hourly, var, start_date, end_date, save_path, time_scale='hourly')
            else:
                plot_comparison(control_hourly, shaded_hourly, var, start_date, end_date, save_path, time_scale='hourly')



## Combined Histogram of t_above_threshold
The following cell defines a function to create a combined chart with separate histograms for temperature sums and counts, each with its own y-axis.

In [None]:
def plot_combined_t_above_threshold(control_file, shaded_file, save_base_path, time_unit='days'):
    """Creates bar charts per month showing temperature sums (with std) and count > 0 using dual axes."""

    # Load data
    control_df = pd.read_csv(control_file, parse_dates=['receivedAt'])
    shaded_df = pd.read_csv(shaded_file, parse_dates=['receivedAt'])

    # Filter from April 1st
    control_df = control_df[control_df['receivedAt'] >= '2024-04-01']
    shaded_df = shaded_df[shaded_df['receivedAt'] >= '2024-04-01']

    # Add month column
    control_df['month'] = control_df['receivedAt'].dt.to_period('M')
    shaded_df['month'] = shaded_df['receivedAt'].dt.to_period('M')

    # Group by month
    months = sorted(set(control_df['month']) & set(shaded_df['month']))

    for month in months:
        control_month = control_df[control_df['month'] == month]
        shaded_month = shaded_df[shaded_df['month'] == month]

        # Skip if missing required column
        if 't_above_threshold_mean' not in control_month or 't_above_threshold_mean' not in shaded_month:
            print(f"Skipping {month} — missing 't_above_threshold_mean'")
            continue

        # Compute sums and stds
        sums = [
            control_month['t_above_threshold_mean'].sum(),
            shaded_month['t_above_threshold_mean'].sum()
        ]
        stds = [
            control_month['t_above_threshold_mean'].std(),
            shaded_month['t_above_threshold_mean'].std()
        ]

        # Counts above threshold
        counts = [
            (control_month['t_above_threshold_mean'] > 0).sum(),
            (shaded_month['t_above_threshold_mean'] > 0).sum()
        ]

        # Plot setup
        categories = ['Control', 'Shaded']
        x = range(len(categories))
        bar_width = 0.25
        offset = 0.15
        sum_pos = [i - offset for i in x]
        count_pos = [i + offset for i in x]

        fig, ax1 = plt.subplots(figsize=(8, 6))

        # Temperature sum bars (with error bars)
        bars1 = ax1.bar(
            sum_pos, sums, yerr=stds,
            width=bar_width, color='white', edgecolor='black',
            hatch='///', label='Temperature Sum (°C)', capsize=5
        )
        ax1.set_ylabel('Temperature Sum (°C)', color='black')
        ax1.tick_params(axis='y', labelcolor='black')

        # Count bars on right y-axis
        ax2 = ax1.twinx()
        bars2 = ax2.bar(
            count_pos, counts,
            width=bar_width, color='white', edgecolor='black',
            label=f'Count ({time_unit})'
        )
        ax2.set_ylabel(f'Count ({time_unit})', color='black')
        ax2.tick_params(axis='y', labelcolor='black')

        # X-axis
        ax1.set_xticks(x)
        ax1.set_xticklabels(categories)
        ax1.set_xlabel('Treatment')

        # Title
        month_str = month.strftime('%B %Y')
        plt.title(f'Temperature Above Threshold — {month_str}')

        # Adjust y-limits
        max_height = max(sums + counts)
        ax1.set_ylim(0, max_height * 1.6)

        # Combined legend
        handles1, labels1 = ax1.get_legend_handles_labels()
        handles2, labels2 = ax2.get_legend_handles_labels()
        fig.legend(
            handles1 + handles2, labels1 + labels2,
            loc='upper right', bbox_to_anchor=(0.9, 0.88), frameon=True
        )

        # Save path
        month_filename = month.strftime('%Y_%m')
        save_path = os.path.join(save_base_path, f'combined_t_above_threshold_{month_filename}.png')
        os.makedirs(save_base_path, exist_ok=True)

        plt.tight_layout(rect=[0, 0, 1, 0.95])
        plt.savefig(save_path)
        plt.close()
        print(f"Saved plot: {save_path}")

## Run Combined Histogram Function
The following cell runs the combined histogram function for t_above_threshold.

In [None]:
plot_combined_t_above_threshold(
    'mean_std_data/combined_termoigrometro_PAR_control_daily_mean_std.csv',
    'mean_std_data/combined_termoigrometro_PAR_shaded_daily_mean_std.csv',
    'plots/combined_t_above_threshold_days',
    time_unit='days'
)

plot_combined_t_above_threshold(
    'mean_std_data/combined_termoigrometro_PAR_control_hourly_mean_std.csv',
    'mean_std_data/combined_termoigrometro_PAR_shaded_hourly_mean_std.csv',
    'plots/combined_t_above_threshold_hours',
    time_unit='hours'
)


Saved plot: plots/combined_t_above_threshold_days.png
Saved plot: plots/combined_t_above_threshold_hours.png


## DLI Comparison
The following cell defines a function to create histograms comparing DLI (Daily Light Integral) for control and shaded data.

In [9]:
def plot_dli_comparison(control_file, shaded_file, save_path):
    """Creates a histogram comparing DLI (mol m-2 d-1) for control and shaded data."""
    # Load data
    control_df = pd.read_csv(control_file)
    shaded_df = pd.read_csv(shaded_file)

    # Calculate the mean DLI for control and shaded
    control_dli_mean = control_df['DLI_mol m-2 d-1'].sum()
    shaded_dli_mean = shaded_df['DLI_mol m-2 d-1'].sum()

    # Data for the histogram
    categories = ['Control', 'Shaded']
    dli_means = [control_dli_mean, shaded_dli_mean]

    # Plot the histogram
    fig, ax = plt.subplots(figsize=(8, 6))
    ax.bar(categories, dli_means, color=['black', 'white'], alpha=0.7, width=0.5, edgecolor='black', linewidth=1.5)

    # Set labels and title
    ax.set_ylabel('DLI (mol m-2 d-1)')
    ax.set_title('Comparison of DLI (Daily Light Integral)')

    # Save the plot
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()
    print(f"Saved DLI comparison histogram: {save_path}")

## Run Plotting Functions
The following cell runs the defined functions to generate and save the plots.

In [None]:

#plot_monthly_comparison()


#plot_daily_comparison()


plot_dli_comparison(
    'hourly_daily_data/PAR_control_daily.csv',
    'hourly_daily_data/PAR_shaded_daily.csv',
    'plots/dli_comparison_histogram.png'
)


Saved plot: plots/monthly/temperature/2024-06.png
Saved plot: plots/monthly/temperature_daytime/2024-06.png
Saved plot: plots/monthly/temperature_nighttime/2024-06.png
Saved plot: plots/monthly/humidity/2024-06.png
Saved plot: plots/monthly/vaporPressureDeficit/2024-06.png
Skipping std for photosyntheticallyActiveRadiation as it is missing.
Saved plot: plots/monthly/photosyntheticallyActiveRadiation/2024-06.png
Saved plot: plots/monthly/temperature/2024-07.png
Saved plot: plots/monthly/temperature_daytime/2024-07.png
Saved plot: plots/monthly/temperature_nighttime/2024-07.png
Saved plot: plots/monthly/humidity/2024-07.png
Saved plot: plots/monthly/vaporPressureDeficit/2024-07.png
Skipping std for photosyntheticallyActiveRadiation as it is missing.
Saved plot: plots/monthly/photosyntheticallyActiveRadiation/2024-07.png
Saved plot: plots/monthly/temperature/2024-08.png
Saved plot: plots/monthly/temperature_daytime/2024-08.png
Saved plot: plots/monthly/temperature_nighttime/2024-08.png
Sa