# Gas Flux Analyzer Notebook
This notebook walks through the process of analyzing soil greenhouse gas (GHG) flux data from a Gasmet analyzer. It automates the calculation of CO₂ and N₂O fluxes, performs linear regression, generates plots, and prepares the data for final analysis.

## 1. Setup and Configuration
First, we'll import the necessary Python libraries.

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns

## 2. Core Functions
These are the helper functions that perform specific tasks like calculating slopes and creating plots.

filter_and_calculate_slope
This function takes the raw data, isolates a specific measurement interval, trims the noisy start and end periods, and then performs a linear regression to find the rate of gas concentration change (the slope).

In [None]:
def filter_and_calculate_slope(df, start_time, end_time, gas='Carbon dioxide CO2'):
    # Filter data within time range
    mask = (df['Time1'] >= start_time) & (df['Time1'] <= end_time)
    interval_data = df.loc[mask].copy()

    if interval_data.empty:
        return None, None, None

    # Convert relative time in seconds
    interval_data['Elapsed'] = (interval_data['Time1'] - interval_data['Time1'].iloc[0]).dt.total_seconds()

    # Drop first 180s and last 100s
    interval_data = interval_data[(interval_data['Elapsed'] >= 180) & 
                                  (interval_data['Elapsed'] <= interval_data['Elapsed'].max() - 100)]

    if interval_data.empty:
        return None, None, None

    # Regression: concentration vs time
    time_seconds = interval_data['Elapsed']
    concentration = interval_data[gas]

    coeffs = np.polyfit(time_seconds, concentration, 1)
    slope = coeffs[0]
    intercept = coeffs[1]
    equation = f"{slope:.4f} * t + {intercept:.2f}"

    return interval_data, slope, equation

## 3. Main Processing Workflow
This is the main function that ties everything together. It reads the data files, loops through each measurement defined in log.txt, calls the helper functions to calculate slopes and generate plots, and finally computes the soil flux.

In [None]:
def process_folder(folder_path):
    results_file_path = os.path.join(folder_path, 'RESULTS.txt')
    log_file_path = os.path.join(folder_path, 'log.txt')

    df = pd.read_csv(results_file_path, delimiter='\t')
    log_df = pd.read_csv(log_file_path, delimiter='\t')

    df['Time1'] = pd.to_datetime(df['Date'] + ' ' + df['Time'], format="%Y-%m-%d %H:%M:%S")
    log_df['Start Time1'] = pd.to_datetime(log_df['Date'] + ' ' + log_df['Start Time'], format="%Y-%m-%d %H:%M:%S")
    log_df['End Time1'] = pd.to_datetime(log_df['Date'] + ' ' + log_df['End Time'], format="%Y-%m-%d %H:%M:%S")
    log_df.columns = log_df.columns.str.encode('utf-8').str.decode('utf-8').str.strip()
    log_df.columns = log_df.columns.str.replace('\n', '').str.replace('\r', '').str.replace('\t', '')

    slopes_co2 = []
    slopes_n2o = []

    figures_dir = os.path.join(folder_path, 'figures')
    os.makedirs(figures_dir, exist_ok=True)

    for _, row in log_df.iterrows():
        start_time = row['Start Time1']
        end_time = row['End Time1']
        sample_id = row['Sample ID']

        for gas, slopes_list in zip(['Carbon dioxide CO2', 'Nitrous oxide N2O'], [slopes_co2, slopes_n2o]):
            interval_data, slope, equation = filter_and_calculate_slope(df, start_time, end_time, gas)

            if interval_data is None:
                print(f"No data available for Sample ID {sample_id} - {gas}")
                slopes_list.append(None)
                continue

            slopes_list.append(slope)
            # Plot
            plt.figure(figsize=(10, 6))
            plt.plot(interval_data['Time1'], interval_data[gas], marker='o', linestyle='-')
            plt.xlabel('Time')
            plt.ylabel(f'{gas} Concentration [ppm]')
            plt.title(f'{gas} Concentration Over Time for Sample ID {sample_id}')
            plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
            plt.gca().xaxis.set_major_locator(mdates.SecondLocator(interval=60))
            plt.text(0.05, 0.95, f'Slope: {slope:.2f} ppm/s\nEquation: {equation}', transform=plt.gca().transAxes,
                     verticalalignment='top', bbox=dict(facecolor='white', alpha=0.8))
            plt.grid(True)
            plt.xticks(rotation=45)
            plt.tight_layout()
            gas_label = gas.split()[-1]
            figure_path = os.path.join(figures_dir, f'{sample_id}_{gas_label}.png')
            plt.savefig(figure_path)
            plt.close()



    # Add slope columns
    log_df['Slope_CO2'] = slopes_co2
    log_df['Slope_N2O'] = slopes_n2o
    extra = log_df['Extension']
    # Constants
    pi = np.pi
    molar_volume = 22.4 * 10**-3
    radius = 0.1143 / 2  # collar radius meters (4.5 inch diameter)
    height = 0.18288     # collar height meters (7.2 inch for our chamber)
    footprint_area =pi * (radius**2)   #Footprint area of the chamber (m2), diameter should be the internal diameter of the collar
    volume_pipe= 0.0011 #cell and pipework (0.4L), volume of the interconnecting pipework (0.6L) any headspace between the soil surface and the chamber (0.1L) from manual total 1.1L or 0.0011m3
    volume = pi * (radius**2) * (height + extra)+volume_pipe
    # Volume and temperature adjustment
    log_df['Volume Ratio'] = volume / molar_volume
    log_df['Area Footprint'] = footprint_area
    log_df['Temperature Ratio'] = 273 / (log_df['Temperature'] + 273)

    # Soil Flux for CO2 and N2O
    log_df['Soil_Flux_CO2'] = log_df['Slope_CO2'] * log_df['Volume Ratio'] / (log_df['Area Footprint'] * log_df['Temperature Ratio']) #unit (μmol m−2s−1 )
    log_df['Soil_Flux_N2O'] = log_df['Slope_N2O'] * log_df['Volume Ratio'] / (log_df['Area Footprint'] * log_df['Temperature Ratio'])  #unit (μmol m−2s−1 )

    output_file_path = os.path.join(folder_path, 'Soil Flux.csv')
    log_df.to_csv(output_file_path, index=False)
    print(log_df.columns.tolist())

## 4. Execute the Analysis
This final code cell is the "driver". It specifies the main data folder.

In [None]:
def main(root_folder):
    for folder_name in os.listdir(root_folder):
        folder_path = os.path.join(root_folder, folder_name)
        if os.path.isdir(folder_path):
            process_folder(folder_path)



In [None]:
root_folder = r'C:\Users\fatem\OneDrive\Desktop\Soil\Soil_Github\Sample_data_processing'
main(root_folder)