In [1]:
import os
import fnmatch
import numpy as np
import pandas as pd
from tqdm import tqdm
import plotly.io as pio
import charset_normalizer
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from typing import Any, Tuple, List

In [2]:
def find_files(directory: str, file_format: str, exclude_substring: str | list = None):
    """
    Locate all files of a certain format in a folder and its subfolders.

    Parameters:
    - directory (str): The directory to search in.
    - file_format (str): The file format to search for (e.g., '.txt').
    - exclude_substring (str): A substring to include if files containing it is to be excluded

    Returns:
    - list: A list of paths to the files found.
    """
    matches = []
    for root, _, filenames in os.walk(directory):
        for filename in fnmatch.filter(filenames, f"*{file_format}"):
            if not any(substring in filename for substring in exclude_substring):
                matches.append(os.path.join(root, filename))
    return matches


def load_and_append_data(file_list: list, date_column: str, file_headers: list):
    # Initialize an empty list to hold DataFrames
    all_dfs = []

    for file_path in file_list:
        try:
            # Load the .txt file into a DataFrame
            new_df = pd.read_csv(file_path, sep=',', names=file_headers)  # Change sep if necessary

            # Check if the date column exists in the new DataFrame
            if date_column not in new_df.columns:
                raise ValueError(f"The specified date column '{date_column}' is not in the DataFrame from '{file_path}'.")

            # Convert the date column to datetime format for accurate sorting
            new_df[date_column] = pd.to_datetime(new_df[date_column], format='%d/%m/%Y %H:%M:%S', dayfirst=True)

            # Sort the new DataFrame by the date column
            new_df = new_df.sort_values(by=date_column)

            # Append sorted DataFrame to the list
            all_dfs.append(new_df)

        except Exception as e:
            print(f"An error occurred while processing '{file_path}': {e}")
            continue  # Skip to the next file if there's an error

    # Combine all the DataFrames into one
    combined_df = pd.concat(all_dfs, ignore_index=True)

    # Sort the combined DataFrame by the date column
    combined_df = combined_df.sort_values(by=date_column)

    # Remove duplicates based on all columns or specify certain columns
    combined_df = combined_df.drop_duplicates(subset=[date_column])

    # Reset index after dropping duplicates and drop the old index
    combined_df.reset_index(drop=True, inplace=True)

    # Make date_column into a date formatted column
    combined_df[date_column] = pd.to_datetime(combined_df[date_column], dayfirst=True)

    return combined_df


def resample_dataframe(df: pd.DataFrame, resample_interval: str = '5 min', date_column: str = 'date_time'):
    """
    Resample a DataFrame containing 1-minute data to 5-minute intervals.

    Parameters:
    - df: DataFrame containing the 1-minute data with a datetime column.
    - date_column: Name of the column that contains datetime information.

    Returns:
    - A DataFrame resampled to 5-minute intervals.
    """
    # Ensure the date column is in datetime format
    type_check = False
    df_temp = df.copy()
    df_temp[date_column] = pd.to_datetime(df_temp[date_column])

    # Set the date column as the index
    df_temp.set_index(date_column, inplace=True)
    try:
        df_temp.drop('Type', axis=1, inplace=True)
    except KeyError:
        type_check = True
    # Resample the data to 5-minute intervals and aggregate using mean
    resampled_df = df_temp.resample(f'{resample_interval}').mean().round(2)

    # Reset the index to have the date column back as a column
    resampled_df.reset_index(inplace=True, names='Date')
    if type_check:
        resampled_df['Type'] = f'{resample_interval} average'
    return resampled_df

In [None]:
class Observation:
    def __init__(self, 
                 species_name: str, 
                 measurements: List[Any, float | np.float64] | np.ndarray[Any, float | np.float64] | pd.DataFrame, 
                 unit: str) -> None:
        
        self.species_name = species_name
        self.unit = unit

        if type(measurements) == np.ndarray:
            measurements_placeholder = pd.DataFrame(measurements)
        elif type(measurements) == list:
            measurements_placeholder =  pd.DataFrame(measurements)
        else:
            raise ValueError('The format of the measurements are not of an accepted format (list, np.ndarray or pd.DataFrame).')
        
        try:
            measurements_placeholder.iloc[:, 0].astype(float)
            measurements_placeholder.columns.values[0] = f'{species_name} [{unit}]'
            measurements_placeholder.columns.values[1] = 'Date'
        except ValueError:
            measurements_placeholder.columns.values[0] = 'Date'
            measurements_placeholder.columns.values[1] = f'{species_name} [{unit}]'
        
        try:
            measurements_placeholder['Date'] = pd.to_datetime(measurements_placeholder['Date'])
        except ValueError:
            raise ValueError('A timeseries for the measurements were not found in the data.')
        self.measurements = measurements_placeholder


    def __str__(self) -> str:
        return f'This is measurements of {self.species_name} with {len(self.measurements)} measurements.'




class StoredObservation(Observation):
    def __init__(self, 
                 species_name: str, 
                 measurements: List[Any, float | np.float64] | np.ndarray[Any, float | np.float64] | pd.DataFrame, 
                 unit: str) -> None:
        super().__init__(species_name, measurements, unit)

    
    def compare_to_original(self, original: 'Observation') -> str:
        if self.species_name != original.species_name:
            raise TypeError('The samples to be compared are of two different species.')
        elif self.unit != original.unit:
            raise ValueError('The units of these observations are different from the original.')
        elif self.measurements['Date'] != original.measurements['Date']:
            raise ValueError('The the observations are of two different timeperiods.')
        else:
            if self.measurements[f'{self.species_name} [{self.unit}]'] != original.measurements[f'{original.species_name} [{original.unit}]']:
                print('The two observations are identical.')

In [None]:
l = 'aeth_data_raw/2021/AE33_AE33-S05-00497_20210114.dat'
import pandas as pd

# Step 1: Load the header using the ';' separator
header_df = pd.read_csv(l, sep=';', skiprows=5).columns
drop_headers = ['drop1', 'drop2', 'drop3', 'drop4', 'drop5']
header_df = [*header_df[:-1], *drop_headers]
# Display the header to inspect it

# Step 2: Load the actual data using the space separator, skipping the header rows
data_df = pd.read_csv(l, sep=' ', skiprows=6 , names=header_df)

# Display the first few rows of the data to inspect it
print("\nData DataFrame:")
print(data_df.head())

# Step 3: Clean the data by dropping any unassigned columns (entirely NaN columns)
#data_df = data_df.dropna(axis=1, how='all')  # Drop columns that are entirely NaN

# Optionally, if you know the names or indices of unassigned columns, you can drop them directly
# Example: data_df = data_df.drop(columns=['Unassigned1', 'Unassigned2'])

# Display the cleaned DataFrame to confirm changes
print("\nCleaned Data DataFrame:")
print(data_df.head())

# Step 4: Save the cleaned DataFrame to a new CSV file
#data_df.to_csv('path/to/your/cleaned_file.csv', index=False)

print("\nCleaned data has been saved to 'path/to/your/cleaned_file.csv'")

In [10]:
if __name__ == "__main__":
    folder_path = "aeth_data_raw/"  # Replace with your folder path
    file_extension = ".dat"    # Replace with your desired file format
    file_exclusion = ['log', 'CT', 'ST']
    drop_headers = ['drop1', 'drop2', 'drop3', 'drop4', 'drop5']
    found_files = find_files(folder_path, file_extension, exclude_substring=file_exclusion)

    df_list = []
    for file in tqdm(found_files[87:], desc="Loading and processing files"):
        #with open(file, 'rb') as f:
        #    result = charset_normalizer.detect(f.read())
        try:
            header_df = pd.read_csv(file, sep=';', skiprows=5).columns#, encoding=result['encoding']).columns
        except UnicodeDecodeError:
            continue
        header_df = [*header_df[:-1], *drop_headers]

        temp = pd.read_csv(file, sep=' ', skiprows=6 , names=header_df, encoding=result['encoding'])
        temp.drop(drop_headers, axis=1, inplace=True)
        df_list.append(temp)
    aeth_df = pd.concat(df_list)
    aeth_df.reset_index(drop=True, inplace=True)

    ### How many significant digits in the final BC concentrations? ###
    aeth_df['BC_final'] = (aeth_df[' BC6'] / 1.6121).round(3)
    aeth_BC_final = aeth_df.loc[:, ['BC_final', 
                                    ' BB(%)', 
                                    ' Pressure(Pa)', 
                                    ' Temperature(°C)', 
                                    ' Flow1',
                                    ' Flow2',
                                    ' Status', 
                                    ' TapeAdvCount']]
    
    aeth_BC_final['Date'] = pd.to_datetime(aeth_df['Date(yyyy/MM/dd)'].astype(str) + ' ' + 
                                           aeth_df[' Time(hh:mm:ss)'].astype(str))

Loading and processing files: 100%|██████████| 2558/2558 [41:47<00:00,  1.02it/s]  


<pre>
<b><u>Manual Zero and Calibration periods</b></u>

<b><u>Manual Calibration</b></u>
<b><i>2016</b></i>
From: 19/08 - 2016 12:01
To:   19/08 - 2016 14:24

From: 19/08 - 2016 15:54
To:   19/08 - 2016 16:42
________________________________________________
<b><i>2018</b></i>
From: 23/03 - 2018 11:23
To:   23/03 - 2018 15:07
________________________________________________
<b><i>2022</b></i>
From: 14/12 - 2022 15:50
To:   15/12 - 2022 12:40
________________________________________________
<b><i>2023</b></i>
From: 17/04 - 2023 19:57
To:   17/04 - 2023 21:27
________________________________________________
<b><i>2024</b></i>
From: 23/03 - 2024 13:00
To:   23/03 - 2024 14:50
________________________________________________

<b><u>Manual Zero</b></u>
<b><i>2016</b></i>
<u>Before</u>
From: 19/08 - 2016 10:26
To:   19/08 - 2016 11:58

<u>After</u>
From: 19/08 - 2016 14:27
To:   19/08 - 2016 15:44
________________________________________________
<b><i>2018</b></i>
<u>Before</u>
From: 22/03 - 2018 16:14
To:   23/03 - 2018 10:59

<u>After</u>
<s>From: dd/mm - yyyy HH:MM</s>
<s>To:   dd/mm - yyyy HH:MM</s>
________________________________________________
<b><i>2022</b></i>
<u>Before</u>
From: 11/12 - 2022 13:05
To:   14/12 - 2022 12:05

<u>After</u>
From: 15/12 - 2022 15:10
To:   16/12 - 2022 08:35
________________________________________________
<b><i>2023</b></i>
<u>Before</u>
From: 17/04 - 2023 17:57
To:   17/04 - 2023 19:23

<u>After</u>
From: 17/04 - 2023 21:30
To:   18/04 - 2023 09:45
________________________________________________
<b><i>2024</b></i>
<u>Before</u> 
From: 22/03 - 2024 16:50
To:   23/03 - 2024 08:35

<u>After</u>
From: 23/03 - 2024 15:25
To:   24/03 - 2024 11:50
</pre>

In [27]:
### Load Nephelometer data and load as a Pandas DataFrame ###

if __name__ == "__main__":
    # List of .txt files to load, including the existing data file
    folder_path = "neph_data_raw/" 
    file_extension = ".txt"  
    neph_headers = ['Date', 
                    'Type', 
                    'Scat635nm [Mm^-1]', 
                    'Scat525nm [Mm^-1]', 
                    'Scat450nm [Mm^-1]', 
                    'BackScat635nm [Mm^-1]', 
                    'BackScat525nm [Mm^-1]', 
                    'BackScat450nm [Mm^-1]', 
                    'Sample Temp. [K]', 
                    'Enclosure Temp. [K]', 
                    'RH [%]', 
                    'Pressure [mBar]'
    ]

    file_list = find_files(folder_path, file_extension, exclude_substring=[])
    
    # Specify the date column name (ensure it's the same in all DataFrames)
    date_column = 'Date' 

    # Load, sort, and append the new data
    neph_df = load_and_append_data(file_list, date_column, neph_headers)
    neph_df = neph_df[neph_df['Date'] >= pd.to_datetime('2016/01/01 00:00:00', dayfirst=True)]
    # Optionally, save the updated DataFrame to a new CSV file
    #if updated_df is not None:
    #    updated_df.to_csv('path/to/your/updated_file.csv', index=False)  # Update this path
    #    print("Data has been successfully updated and saved.")

In [28]:
### Extract all one minute values ###
one_min_instant = neph_df[
    (neph_df['Date'] >= pd.to_datetime('18/04/2023 09:46:00', dayfirst=True)) & 
    (neph_df['Date'] <= pd.to_datetime('25/03/2024 09:59:00', dayfirst=True))]

one_min_instant.set_index('Date', inplace=True)
one_min_instant = one_min_instant.reindex(pd.date_range(start=pd.to_datetime('18/04/2023 09:46:00', dayfirst=True), end=pd.to_datetime('25/03/2024 09:59:00', dayfirst=True), freq='1min', inclusive='both'))
one_min_instant.reset_index(inplace=True, names='Date')

In [29]:
if __name__ == "__main__":
    # Resample the DataFrame to 5-minute intervals
    header_order = one_min_instant.columns
    df_5min = resample_dataframe(one_min_instant[one_min_instant['Type'] == '1 min instant'], date_column='Date')
    df_5min = df_5min[header_order]

In [None]:
co2_calibration_times = {'2016_1': ['19-08-2016 12:01', '19-08-2016 14:24'], 
                         '2016_2': ['19-08-2016 15:54', '19-08-2016 16:42'], 
                         '2018': ['23-03-2018 11:23', '23-03-2018 15:07'],
                         '2022': ['14-12-2022 15:50', '15-12-2022 12:40'],
                         '2023': ['17-04-2023 19:57', '17-04-2023 21:27'],
                         '2024': ['23-03-2024 13:00', '23-03-2024 14:50']}

manual_zero_times = {'2016_before': ['19-08-2016 10:26', '19-08-2016 11:58'],
                     '2016_after': ['19-08-2016 14:27', '19-08-2016 15:44'],
                     '2018_before': ['22-03-2018 16:14', '23-03-2018 10:59'],
                     '2022_before': ['11-12-2022 13:05', '14-12-2022 12:05'],
                     '2022_after': ['15-12-2022 15:10', '16-12-2022 08:35'],
                     '2023_before': ['17-04-2023 17:57', '17-04-2023 19:23'],
                     '2023_after': ['17-04-2023 21:30', '18-04-2023 09:45'],
                     '2024_before': ['22-03-2024 16:50', '23-03-2024 08:35'],
                     '2024_after': ['23-03-2024 15:25', '24-03-2024 11:50']
                     }
# Locate and slice out all data in one minute instant format
neph_df_sliced = neph_df[
    (neph_df['Date'] < pd.to_datetime('18/04/2023 09:46:00', dayfirst=True)) | 
    (neph_df['Date'] > pd.to_datetime('25/03/2024 09:59:00', dayfirst=True))]

# Add back the one minute instant formatted data in a 5 minute average, so the data is consistent
neph_df_5_min = pd.concat([neph_df_sliced, df_5min])
neph_df_5_min.sort_values(by='Date', inplace=True)
neph_df_5_min.reset_index(inplace=True, drop=True)

# Sort the different types of data in separate dataframes for handling
# Measurement data
neph_data = neph_df_5_min[neph_df_5_min['Type'] == '5 min average'].reset_index(drop=True)

# Possible Manual Zero and calibration data
neph_zero_cal = neph_df_5_min[neph_df_5_min['Type'] == '1 min instant'].reset_index(drop=True)

# All automated zeroes
neph_auto_zero = neph_df_5_min[
    (neph_df_5_min['Type'] == 'Zero 5 min instant') | 
    (neph_df_5_min['Type'] == 'Zero check final')].reset_index(drop=True)

# Remove period after automated zero application was turned on
neph_auto_zero = neph_auto_zero[neph_auto_zero['Date'] <= pd.to_datetime('18/04/2023 09:46:00', dayfirst=True)].reset_index(drop=True)

# All automated zeroes in the one minute instant format
neph_auto_zero_cal_1min = one_min_instant[
    (one_min_instant['Type'] == 'Zero 1 min instant') | 
    (one_min_instant['Type'] == 'Zero check final') | 
    (one_min_instant['Type'] == 'Span 1 min instant')].reset_index(drop=True)

# Remove period after automated zero application was turned on
neph_span_1min = neph_auto_zero_cal_1min[neph_auto_zero_cal_1min['Type']=='Span 1 min instant'].reset_index(drop=True)

In [32]:
def slice_dataframe(df, column_name=None, split_string=None, date_column=None, minute_diff=5):
    # List to hold the new DataFrames
    dataframes = {}
    
    # Find the indices where the split string appears
    if split_string is not None:
        try:
            indices = df.index[df[column_name].str.contains(split_string, na=False)].tolist()
        except ValueError:
            raise ValueError('Please input a name for the column to be used for the split.')
    elif date_column is not None:
        indices = []
        for i, value in enumerate(df[date_column]):
            if ((pd.to_datetime(value) - pd.to_datetime(df[date_column].iloc[i-1])).total_seconds() / 60) > minute_diff: 
                indices.append(i-1)
            else:
                continue
        indices.append(len(df))
    else:
        print('Please specify a date column for the DataFrame.')
    # Slice the DataFrame into smaller DataFrames based on the indices
    for i in range(len(indices)):
        if i == 0:
            start = 0
        else:
            start = indices[i-1]+1  # Start after the split string
        end = indices[i]+1 if i+1 < len(indices) else len(df)  # End at the next split or the end of the DataFrame
        # Create a new DataFrame slice
        new_df = df.iloc[start:end].reset_index(drop=True)
        dataframes[f'{new_df['Date'].iloc[0]}'] = new_df
    return dataframes


def apply_correction(data_to_correct, correction_data, correction_type):
    
    temp_df = data_to_correct.copy()
    # Step 1: Iterate through the rows of df_zero to subtract the zero measurements
    for idx, row in correction_data.iterrows():
        correction_time = pd.to_datetime(row['Date'])  # The timestamp when the zero measurement should apply'

        # Get the next zero timestamp or the last time
        next_correction_time = pd.to_datetime(correction_data['Date'].iloc[idx+1] if idx + 1 < len(correction_data) else temp_df.index[-1])

        # Get the columns that match
        matching_columns = [col for col in temp_df.columns if col in row.index and col != 'Date']

        # Step 2: Apply the zero measurement correction for the entire period
        for col in matching_columns:
            if correction_type == '-':
                # Subtract the zero measurement (with noise) from all rows
                temp_df.loc[(temp_df['Date'] >= correction_time) & 
                            (temp_df['Date'] < next_correction_time), col] -= row[col]
            elif correction_type == '+':
                temp_df.loc[(temp_df['Date'] >= correction_time) & 
                            (temp_df['Date'] < next_correction_time), col] += row[col]
            elif correction_type == '*':
                temp_df.loc[(temp_df['Date'] >= correction_time) & 
                            (temp_df['Date'] < next_correction_time), col] *= row[col]
            elif correction_type == '/':
                temp_df.loc[(temp_df['Date'] >= correction_time) & 
                            (temp_df['Date'] < next_correction_time), col] /= row[col]
            else:
                raise ValueError(f'Correction type {correction_type} not recognized, please specify one of the following: +, -, *, /.')
    temp_df[matching_columns] = temp_df[matching_columns].round(2)
    return temp_df


def merge_monthly_data(data):
    """
    Merges entries with the same year and month by averaging their values (which are pandas Series),
    keeping the earliest timestamp as the key.

    Args:
        data (dict): Ordered dictionary where keys are timestamps (as strings)
                     and values are pandas Series.

    Returns:
        dict: Updated dictionary with merged values for timestamps in the same month and year.
    """
    # Convert the dictionary keys to datetime for easier comparison
    data = {pd.to_datetime(k): v for k, v in data.items()}

    # Output dictionary to store the result
    output = {}

    # Iterate over the dictionary, checking adjacent keys
    keys_to_remove = []  # To track keys that are merged

    for i in range(1, len(data)):
        timestamp, values = list(data.items())[i]
        prev_timestamp, prev_values = list(data.items())[i - 1]

        # Check if both timestamps are in the same month and year
        if timestamp.year == prev_timestamp.year and timestamp.month == prev_timestamp.month:
            # Calculate the mean of the corresponding Series objects
            mean_values = (prev_values + values) / 2
            
            # Keep the earlier timestamp and store the mean values
            output[str(prev_timestamp)] = mean_values.round(2)
            keys_to_remove.append(timestamp)
        else:
            # If timestamps are not in the same month/year, just copy the current row
            output[str(prev_timestamp)] = prev_values

    # Add the last entry if not merged
    last_timestamp, last_values = list(data.items())[-1]
    if last_timestamp not in keys_to_remove:
        output[str(last_timestamp)] = last_values

    return output


# Example usage
if __name__ == "__main__":

    co2_full_scatter = {'635nm': 11.15,
                        '525nm': 23.86,
                        '450nm': 44.21}

    co2_back_scatter = {'635nm': 5.58,
                        '525nm': 11.93,
                        '450nm': 22.11}

    full_headers = [head for head in neph_auto_zero.columns if 'Scat' in head and 'Back' not in head]
    back_headers = [head for head in neph_auto_zero.columns if 'Scat' in head and 'Back' in head]

    # Find all periods where automated zero measurements have taken place
    neph_auto_zero_dict = slice_dataframe(neph_auto_zero, 'Type', 'Zero check final')
    # neph_auto_zero_1min_dict = slice_dataframe(neph_auto_zero_cal_1min[neph_auto_zero_cal_1min['Type'] != 'Span 1 min instant'], date_column='Date', minute_diff=1)

    neph_man_zeroes = {}
    neph_auto_zeroes = {}
    calibration_dict = {}
    neph_co2_calibration = {}
    neph_auto_zeroes_1min = {}

    for key, times in manual_zero_times.items():
        tt = [pd.to_datetime(time, dayfirst=True) for time in times]
        try:
            neph_man_zeroes[str(tt[0])] = neph_zero_cal.iloc[neph_zero_cal[neph_zero_cal['Date']==tt[0]].index[0]:
                                                             neph_zero_cal[neph_zero_cal['Date']==tt[1]].index[0]+1, 2:].mean().round(2).T
        except IndexError:
            neph_man_zeroes[str(tt[0])] = neph_df_5_min.iloc[neph_df_5_min[neph_df_5_min['Date']==tt[0]].index[0]:
                                                             neph_df_5_min[neph_df_5_min['Date']==tt[1]].index[0]+1, 2:].mean().round(2).T
            
            neph_df_5_min_data = neph_df_5_min.drop(neph_df_5_min.index[neph_df_5_min[neph_df_5_min['Date']==tt[0]].index[0]:
                                                                        neph_df_5_min[neph_df_5_min['Date']==tt[1]].index[0]+1])
            
    for key, df in neph_auto_zero_dict.items():
        neph_auto_zeroes[key] = df.iloc[:, 2:].mean().round(2).T

    # for key, df in neph_auto_zero_1min_dict.items():
    #     neph_auto_zeroes_1min[key] = df.iloc[:, 2:].mean().round(2).T
    #     #print(key, neph_auto_zeroes_1min[key])
    
    avg_man_zeroes = merge_monthly_data(neph_man_zeroes)

    neph_all_zeroes = {**neph_auto_zeroes, **neph_auto_zeroes_1min, **avg_man_zeroes}
    neph_all_zeroes = dict(sorted(neph_all_zeroes.items()))

    neph_zeroes_df = pd.DataFrame(neph_all_zeroes).T  # Transpose to make dates as rows

    # Add 'Date' column with the date as a column, reset index to make it part of the DataFrame
    neph_zeroes_df['Date'] = pd.to_datetime(neph_zeroes_df.index)
    neph_zeroes_df = neph_zeroes_df.drop(['Sample Temp. [K]', 'Enclosure Temp. [K]', 'RH [%]', 'Pressure [mBar]'], axis=1).reset_index(drop=True)  # Reset index to get rid of the original index

    # Reorder the columns to have 'Date' first
    neph_zeroes_df = neph_zeroes_df[['Date'] + [col for col in neph_zeroes_df.columns if col != 'Date']]

    neph_zeroed_cal_data = apply_correction(neph_zero_cal, neph_zeroes_df, '-')
    neph_zeroed_data = apply_correction(neph_data, neph_zeroes_df, '-')

    for key, times in co2_calibration_times.items():
        tl = [pd.to_datetime(time, dayfirst=True) for time in times]
        try:
            neph_co2_calibration[str(tl[0])] = neph_zeroed_cal_data.iloc[neph_zeroed_cal_data[neph_zeroed_cal_data['Date']==tl[0]].index[0]:
                                                                         neph_zeroed_cal_data[neph_zeroed_cal_data['Date']==tl[1]].index[0]+1, 2:]
        except IndexError:
            neph_co2_calibration[str(tl[0])] = neph_df_5_min_data.iloc[neph_df_5_min_data[neph_df_5_min_data['Date']==tl[0]].index[0]:
                                                                       neph_df_5_min_data[neph_df_5_min_data['Date']==tl[1]].index[0]+1, 2:]
        calibration_dict_full = {}
        for wl, item in co2_full_scatter.items():
            #print(tl)
            corr_item = item * (neph_co2_calibration[str(tl[0])]['Pressure [mBar]']/1013.25) * (273.15/293.15)
            for head in full_headers:
                if wl in head:
                    calibration_dict_full[head] = [round((corr_item/neph_co2_calibration[str(tl[0])][head]).mean(), 2)]

        calibration_dict_back = {}
        for wl, item in co2_back_scatter.items():
            corr_item = item * (neph_co2_calibration[str(tl[0])]['Pressure [mBar]']/1013.25) * (273.15/293.15) 
            for head in back_headers:
                if wl in head:
                    calibration_dict_back[head] = [round((corr_item/neph_co2_calibration[str(tl[0])][head]).mean(), 2)]
        
        temp = {**calibration_dict_full, **calibration_dict_back}
        
        if not calibration_dict:
            for key in temp:
                calibration_dict[key] = temp[key]
        else:
            for key in calibration_dict:
                calibration_dict[key].extend(temp[key])


    calibration_coefficients = pd.DataFrame(calibration_dict)
    calibration_coefficients['Date'] = pd.to_datetime(list(neph_co2_calibration.keys()), )

    neph_zero_cal_data = apply_correction(neph_zeroed_data, calibration_coefficients, '*')

In [None]:
# Function to interpolate the calibration coefficient
def interpolate_calibration(timestamp, cal_df, coef_name):  
    # Find the two calibrations between which this timestamp falls
    for i in range(1, len(cal_df)):
        cal_1 = cal_df.iloc[i - 1]
        cal_2 = cal_df.iloc[i]
        

        if cal_1['Date'] <= timestamp <= cal_2['Date']:
            if pd.to_datetime('2022-12-14 15:50:00') < timestamp <= pd.to_datetime('2023-04-17 19:57:00'):
                return cal_1[coef_name]
            # Linear interpolation formula
            t1 = cal_1['Date']
            t2 = cal_2['Date']
            c1 = cal_1[coef_name]
            c2 = cal_2[coef_name]
            
            # Calculate the interpolated calibration coefficient
            delta_time = (timestamp - t1).total_seconds()  # Time difference in seconds
            delta_cal = c2 - c1  # Difference in calibration coefficients
            total_time = (t2 - t1).total_seconds()  # Total time difference in seconds
            
            # Interpolated coefficient using linear interpolation
            return c1 + (delta_time / total_time) * delta_cal

# Apply the interpolation to the timeseries data
neph_test = neph_zeroed_data.copy()
neph_test['Scat635nm Coeff'] = neph_test['Date'].apply(lambda x: interpolate_calibration(x, calibration_coefficients, 'Scat635nm [Mm^-1]'))
neph_test['Scat525nm Coeff'] = neph_test['Date'].apply(lambda x: interpolate_calibration(x, calibration_coefficients, 'Scat525nm [Mm^-1]'))
neph_test['Scat450nm Coeff'] = neph_test['Date'].apply(lambda x: interpolate_calibration(x, calibration_coefficients, 'Scat450nm [Mm^-1]'))
neph_test['BackScat635nm Coeff'] = neph_test['Date'].apply(lambda x: interpolate_calibration(x, calibration_coefficients, 'BackScat635nm [Mm^-1]'))
neph_test['BackScat525nm Coeff'] = neph_test['Date'].apply(lambda x: interpolate_calibration(x, calibration_coefficients, 'BackScat525nm [Mm^-1]'))
neph_test['BackScat450nm Coeff'] = neph_test['Date'].apply(lambda x: interpolate_calibration(x, calibration_coefficients, 'BackScat450nm [Mm^-1]'))

In [63]:
neph_start = pd.to_datetime(neph_test['Date'].iloc[0])
neph_end = pd.to_datetime(neph_test['Date'].iloc[-1])
neph_test.set_index('Date', inplace=True)
neph_test = neph_test.reindex(pd.date_range(start=neph_start, end=neph_end, freq='5min', inclusive='both'))
neph_test.reset_index(inplace=True, names='Date')

In [38]:
# Function to fill only the trailing NaNs (end of the column) with the last valid value
def fill_trailing_nans(col):
    # Check if the column has NaNs at the end
    last_valid_value = col.last_valid_index()  # Get the last valid index

    if pd.notna(last_valid_value):  # If there are valid values in the column
        # Get the value of the last valid index
        last_value = col[last_valid_value]

        # Fill NaNs at the end (after the last valid index) with the last value
        col[last_valid_value + 1:] = last_value
        
    return col

# Apply the function to each column in the DataFrame
neph_test = neph_test.apply(fill_trailing_nans)

In [52]:
neph_test['Final Scat635nm [Mm^-1]'] = np.where(neph_test['Scat635nm Coeff'].isna(), neph_test['Scat635nm [Mm^-1]'], neph_test['Scat635nm Coeff'] * neph_test['Scat635nm [Mm^-1]'])
neph_test['Final Scat525nm [Mm^-1]'] = np.where(neph_test['Scat525nm Coeff'].isna(), neph_test['Scat525nm [Mm^-1]'], neph_test['Scat525nm Coeff'] * neph_test['Scat525nm [Mm^-1]'])
neph_test['Final Scat450nm [Mm^-1]'] = np.where(neph_test['Scat450nm Coeff'].isna(), neph_test['Scat450nm [Mm^-1]'], neph_test['Scat450nm Coeff'] * neph_test['Scat450nm [Mm^-1]'])
neph_test['Final BackScat635nm [Mm^-1]'] = np.where(neph_test['BackScat635nm Coeff'].isna(), neph_test['BackScat635nm [Mm^-1]'], neph_test['BackScat635nm Coeff'] * neph_test['BackScat635nm [Mm^-1]'])
neph_test['Final BackScat525nm [Mm^-1]'] = np.where(neph_test['BackScat525nm Coeff'].isna(), neph_test['BackScat525nm [Mm^-1]'], neph_test['BackScat525nm Coeff'] * neph_test['BackScat525nm [Mm^-1]'])
neph_test['Final BackScat450nm [Mm^-1]'] = np.where(neph_test['BackScat450nm Coeff'].isna(), neph_test['BackScat450nm [Mm^-1]'], neph_test['BackScat450nm Coeff'] * neph_test['BackScat450nm [Mm^-1]'])

In [53]:
neph_test.to_csv('neph_data_corrected/full_nephelometer_corrected_data.csv')

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(16, 8))
ax.plot(calibration_coefficients['Date'], calibration_coefficients['Scat635nm [Mm^-1]'], label='True Full 635nm')
ax.plot(calibration_coefficients['Date'], calibration_coefficients['Scat525nm [Mm^-1]'], label='True Full 525nm')
ax.plot(calibration_coefficients['Date'], calibration_coefficients['Scat450nm [Mm^-1]'], label='True Full 450nm')
ax.plot(calibration_coefficients['Date'], calibration_coefficients['BackScat635nm [Mm^-1]'], label='True Back 635nm')
ax.plot(calibration_coefficients['Date'], calibration_coefficients['BackScat525nm [Mm^-1]'], label='True Back 525nm')
ax.plot(calibration_coefficients['Date'], calibration_coefficients['BackScat450nm [Mm^-1]'], label='True Back 450nm')

ax.plot(neph_test['Date'], neph_test['Scat635nm Coeff'], label='Interpolated Full 635nm')
ax.plot(neph_test['Date'], neph_test['Scat525nm Coeff'], label='Interpolated Full 525nm')
ax.plot(neph_test['Date'], neph_test['Scat450nm Coeff'], label='Interpolated Full 450nm')
ax.plot(neph_test['Date'], neph_test['BackScat635nm Coeff'], label='Interpolated Back 635nm')
ax.plot(neph_test['Date'], neph_test['BackScat525nm Coeff'], label='Interpolated Back 525nm')
ax.plot(neph_test['Date'], neph_test['BackScat450nm Coeff'], label='Interpolated Back 450nm')

ax.legend(loc='lower left')
plt.show()

In [None]:
start_date = neph_zero_cal_data['Date'].iloc[0]
end_date = neph_zero_cal_data['Date'].iloc[-1]
neph_zero_cal_data.set_index('Date', inplace=True)
neph_zero_cal_data = neph_zero_cal_data.reindex(pd.date_range(start=pd.to_datetime(start_date, dayfirst=True), 
                                                              end=pd.to_datetime(end_date, dayfirst=True), 
                                                              freq='5min', inclusive='both'))
neph_zero_cal_data.reset_index(inplace=True, names='Date')
#neph_zero_cal_30min = resample_dataframe(neph_zero_cal_data, resample_interval='30 min', date_column='Date')


start_date = neph_zero_cal['Date'].iloc[0]
end_date = neph_zero_cal['Date'].iloc[-1]
neph_zero_cal.set_index('Date', inplace=True)
neph_zero_cal = neph_zero_cal.reindex(pd.date_range(start=pd.to_datetime(start_date, dayfirst=True), 
                                                    end=pd.to_datetime(end_date, dayfirst=True), 
                                                    freq='5min', inclusive='both'))
neph_zero_cal.reset_index(inplace=True, names='Date')

2016-01-01 00:00:00
2024-09-05 16:05:00


Data correction
Neph  C=a+b*Å 450Åblue:450/525blue/green, 525Ågreen:450/635 blue/red, 635Åred:525/635 green/red in paper (T.Muller 2011)\
for aurora no cut blue;a=1.455 b=-0,189 gree a=1,434 b=-0,176 red a=1,403 b=-0,156\
Tsca 450 nm (blue), 525 nm (green) and 635 nm (red) ,reading order is 1red 2green 3blue\ 
sca 450 blue/ 635nm red\
 
 a_b=1.455, b_b=-0.189\
 a_g=1.434, b_g=-0.176\
 a_r=1.403, b_r=-0.156

In [None]:
### Truncation Correction WIP ###
blue_light = '450nm'
green_light = '525nm'
red_light = '635nm'
# Constants for blue, green, and red
a_b = 1.455
b_b = -0.189
a_g = 1.434
b_g = -0.176
a_r = 1.403
b_r = -0.156

for column in neph_zero_cal_data.columns:
    # Assuming scat_uncor is a NumPy array with at least 3 columns (blue, green, red)
    # Example: scat_uncor = np.array([[val1, val2, val3], [val4, val5, val6], ...])

    # Initialize an array for corrected scatter values
    scat_cor = np.zeros((len(neph_zero_cal_data), 6))
    if blue_light in column:
        # Correction for blue (scat_uncor[:, 2] and scat_uncor[:, 1])
        SAE_bg = np.zeros(len(neph_zero_cal_data))
        c_bg = np.zeros(len(neph_zero_cal_data))
        for k in range(len(neph_zero_cal_data)):
            SAE_bg[k] = -np.log(neph_zero_cal_data[k, 2] / neph_zero_cal_data[k, 1]) / np.log(450 / 525)
            c_bg[k] = a_b + b_b * SAE_bg[k]
            scat_cor[k, 0] = neph_zero_cal_data[k, 0] * c_bg[k]  # First column (blue)
    if green_light in column:
        # Correction for green (scat_uncor[:, 2] and scat_uncor[:, 0])
        SAE_br = np.zeros(len(neph_zero_cal_data))
        c_br = np.zeros(len(neph_zero_cal_data))
        for k in range(len(neph_zero_cal_data)):
            SAE_br[k] = -np.log(neph_zero_cal_data[k, 2] / neph_zero_cal_data[k, 0]) / np.log(450 / 635)
            c_br[k] = a_g + b_g * SAE_br[k]
            scat_cor[k, 1] = neph_zero_cal_data[k, 1] * c_br[k]  # Second column (green)
    if red_light in column:
        # Correction for red (scat_uncor[:, 1] and scat_uncor[:, 0])
        SAE_gr = np.zeros(len(neph_zero_cal_data))
        c_gr = np.zeros(len(neph_zero_cal_data))
        for k in range(len(neph_zero_cal_data)):
            SAE_gr[k] = -np.log(neph_zero_cal_data[k, 1] / neph_zero_cal_data[k, 0]) / np.log(525 / 635)
            c_gr[k] = a_r + b_r * SAE_gr[k]
            scat_cor[k, 2] = neph_zero_cal_data[k, 2] * c_gr[k]  # Third column (red)

        # scat_cor now contains the corrected scatter values for blue, green, and red

In [3]:
neph_test = pd.read_csv('neph_data_corrected/full_nephelometer_corrected_data.csv')
neph_test['Date'] = pd.to_datetime(neph_test['Date'])

In [16]:
final_values = ['Final Scat450nm [Mm^-1]', 'Final Scat525nm [Mm^-1]', 'Final Scat635nm [Mm^-1]', 'Final BackScat450nm [Mm^-1]', 'Final BackScat525nm [Mm^-1]', 'Final BackScat635nm [Mm^-1]']
df_slice = neph_test[neph_test['Date'] >= pd.to_datetime('2022/01/01 00:00')]

for value in final_values:
    param_name = value.split()
    new_df = df_slice.loc[:, ['Date', value]].reset_index(drop=True)
    data_flag = np.ones(len(new_df))
    new_df[f'{param_name[1]} Flag'] = data_flag
    # Rename columns dynamically
    new_df = new_df.rename(columns={
        "Date": "x",  # Rename 'Date' to 'x'
        next(col for col in new_df.columns if 'flag' in col.lower()): "Flag",  # Find and rename the column containing 'flag'
    })

    # Find the remaining column and rename it to 'y'
    remaining_column = [col for col in new_df.columns if col not in ['x', 'Flag']][0]
    new_df = new_df.rename(columns={remaining_column: "y"})
    new_df.to_csv(f'neph_data_corrected/neph_data_2022_2024/{value}.csv')

In [None]:
def create_time_series_plot(data, date_column, value_columns, save_fig=False, save_name=None):
    min_y = []
    max_y = []

    # Ensure date column is in datetime format
    data.loc[:, date_column] = pd.to_datetime(data[date_column])

    # Determine the x-axis limits
    start_date = data[date_column].min()
    end_date = data[date_column].max()

    # Calculate the extended x-range (2 weeks before and 2 weeks after the data)
    x_start = start_date - pd.Timedelta(weeks=2)
    x_end = end_date + pd.Timedelta(weeks=2)

    # Create the plot
    plt.figure(figsize=(10, 6))

    # Add a line plot for each value column
    for value_column in value_columns:
        plt.plot(data[date_column], data[value_column], label=value_column[10:-8])  # Remove the prefix/suffix from the label
        min_y.append(data[value_column].min())
        max_y.append(data[value_column].max())

    # Setting the x-axis and y-axis limits based on your requested ranges
    plt.xlim(pd.to_datetime('2022/01/01 00:00'), data[date_column].iloc[-1])  # Update x-axis range
    y_min = min(min_y) * 0.95  # 5% below the minimum value
    y_max = max(max_y) * 1.05  # 5% above the maximum value
    plt.ylim(-2, 50)  # Use the fixed y-axis range as per your request (adjusted from your example)

    # Add title and labels
    plt.title('Nephelometer Measurements Jan 2022 - Sep 2024')
    plt.xlabel(f'{date_column} [YYYY-mm]')
    plt.ylabel(r'Scattering [Mm$^{-1}$]')

    # Format the x-axis to show dates properly
    plt.xticks(rotation=45)

    # Display legend
    plt.legend(title='Wavelengths', loc='upper left')

    # Add grid
    plt.grid(True)

    # Show the plot
    plt.tight_layout()
    if save_fig:
        plt.savefig(save_name)
    plt.show()

    return plt

# Example usage
if __name__ == "__main__":
    # Create the interactive time series plot
    neph_plot = create_time_series_plot(neph_test[neph_test['Date'] >= pd.to_datetime('2022/01/01 00:00')], 
                                        'Date', 
                                        ['Final Scat450nm [Mm^-1]', 'Final Scat525nm [Mm^-1]', 'Final Scat635nm [Mm^-1]'],
                                        save_fig=True,
                                        save_name='neph_measurements_jan_2022_sep_2024.pdf')

In [None]:
import matplotlib as mpl
mpl.rcParams['agg.path.chunksize'] = 10000  # Increase the chunk size (default is 0, which means no limit)

def create_time_series_plot(data, date_column, value_columns, save_fig=False, save_name=None):
    min_y = []
    max_y = []

    # Ensure date column is in datetime format
    data.loc[:, date_column] = pd.to_datetime(data[date_column])

    # Determine the x-axis limits
    start_date = data[date_column].min()
    end_date = data[date_column].max()

    # Calculate the extended x-range (2 weeks before and 2 weeks after the data)
    x_start = start_date - pd.Timedelta(weeks=2)
    x_end = end_date + pd.Timedelta(weeks=2)

    # Create the plot
    plt.figure(figsize=(10, 6))

    # Add a line plot for each value column
    for value_column in value_columns:
        plt.plot(data[date_column], data[value_column], label=value_column[10:-8])  # Remove the prefix/suffix from the label
        min_y.append(data[value_column].min())
        max_y.append(data[value_column].max())

    # Setting the x-axis and y-axis limits based on your requested ranges
    plt.xlim(pd.to_datetime('2022/01/01 00:00'), data[date_column].iloc[-1])  # Update x-axis range
    y_min = min(min_y) * 0.95  # 5% below the minimum value
    y_max = max(max_y) * 1.05  # 5% above the maximum value
    plt.ylim(-300, 400)  # Use the fixed y-axis range as per your request (adjusted from your example)

    # Add title and labels
    plt.title('Aethalometer Measurements Jan 2022 - Sep 2024')
    plt.xlabel(f'{date_column} [YYYY-mm]')
    plt.ylabel(r'BC [ng/m$^3$]')

    # Format the x-axis to show dates properly
    plt.xticks(rotation=45)

    # Display legend
    #plt.legend(title='Wavelengths', loc='upper left')

    # Add grid
    plt.grid(True)

    # Show the plot
    plt.tight_layout()
    if save_fig:
        plt.savefig(save_name)
    plt.show()

    return plt

# Example usage
if __name__ == "__main__":
    # Create the interactive time series plot
    aeth_BC_5_min = resample_dataframe(aeth_BC_final, resample_interval='1min', date_column='Date')
    aeth_plot = create_time_series_plot(aeth_BC_5_min[aeth_BC_5_min['Date'] >= pd.to_datetime('2022/01/01 00:00')], 
                                        'Date', 
                                        ['BC_final'],
                                        save_fig=True,
                                        save_name='aeth_measurements_jan_2022_sep_2024_1min.pdf')

In [20]:
neph_nan = neph_test.copy()
neph_nan.set_index('Date', inplace=True)
neph_nan.reindex(pd.date_range(pd.to_datetime(neph_test['Date'].iloc[0]), pd.to_datetime(neph_test['Date'].iloc[-1]), 
                               freq='5min'))
neph_nan.reset_index(inplace=True)

In [None]:
def create_time_series_plot(data, date_column, value_columns):
    min_y = []
    max_y = []
    
    # Ensure date column is in datetime format
    data.loc[:, date_column] = pd.to_datetime(data[date_column])

    # Determine the x-axis limits
    start_date = data[date_column].min()
    end_date = data[date_column].max()
    x_start = start_date - pd.Timedelta(weeks=2)
    x_end = end_date + pd.Timedelta(weeks=2)

    # Create the plot
    fig = go.Figure()

    # Add a line trace
    for value_column in value_columns:
        fig.add_trace(go.Scatter(x=data[date_column], y=data[value_column], mode='lines', name=value_column[10:-8]))
        min_y.append(data[value_column].min()), max_y.append(data[value_column].max())

    # Set x and y axis limits
    fig.update_xaxes(range=[x_start, x_end], title_text=date_column)
    
    # Set y axis limits proportional to the values
    y_min = min(min_y) * 0.95  # 5% below min value
    y_max = max(max_y) * 1.05  # 5% above max value
    fig.update_yaxes(range=[y_min, y_max], title_text=value_column)
    fig.update_xaxes(range=[pd.to_datetime('2022/01/01 00:00'), data[date_column].iloc[-1]])

    # Add title and layout adjustments
    fig.update_layout(title='Nephelometer Measurements Jan 2022 - Sep 2024',
                      template='plotly_white',
                      xaxis_title=date_column,
                      yaxis_title=r'Scattering [Mm$^{-1}$]',
                      hovermode='x unified')

    # Show the plot
    fig.show()
    return fig

# Example usage
if __name__ == "__main__":
    # Create the interactive time series plot
    neph_plot = create_time_series_plot(neph_nan[neph_nan['Date'] >= pd.to_datetime('2022/01/01 00:00')], 'Date', ['Final Scat450nm [Mm^-1]', 'Final Scat525nm [Mm^-1]', 'Final Scat635nm [Mm^-1]'])

Make threshold to look for data with possible error. Threshold is set to a scattering coefficient of 100 Mm^-1\
Flag carryover from zero and span measurements with relevant flags

### Flag types
#### Flag 1
(Value = 1)
Good measurement, use as wanted.

#### Flag 2
Compromised measurement, use with caution, measurement might be perturbed by:
- Flag 2 A (Value = 2)
    - (Hyper-) Local source
- Flag 2 B (Value = 3)
    - Bad Zero or calibration

#### Flag 3
(Value = 4)
Bad measurement, do not use!

#### Flag 5
Span and zero + carry over, do not use!




In [None]:
def find_indices_above_threshold(df, date_column, value_column, threshold):
    # Ensure the date column is in datetime format
    df[date_column] = pd.to_datetime(df[date_column])

    # Create a boolean mask for values above the threshold
    mask = df[value_column] >= threshold

    # Filter the DataFrame based on the mask
    above_threshold = df[mask]

    if above_threshold.empty:
        raise Exception('No values found above threshold.')

    # Calculate the time differences
    time_diffs = above_threshold[date_column].diff()

    # Identify groups of consecutive values above the threshold
    groups = (time_diffs >= pd.Timedelta('3 hour')).cumsum()

    # Find tuples of indices for groups that last for more than 1 hour
    index_tuples = []
    for _, group in above_threshold.groupby(groups):
        if len(group) > 0:
            # Check if the group spans more than 1 hour, considering the averaging period
            start_time = pd.to_datetime(group[date_column].iloc[0])
            end_time = pd.to_datetime(group[date_column].iloc[-1])
            if (end_time - start_time) >= pd.Timedelta('3 hour'):
                index_tuples.append((group.index[0], group.index[-1]))

    return index_tuples
# Example usage
if __name__ == "__main__":

    # Define the threshold
    threshold = 20
    
    # Find indices above threshold for more than 1 hour
    indices = find_indices_above_threshold(neph_data.drop('Type', axis=1), 'Date', 'Scat525nm', threshold)
    
    # Display results
    for idx in indices:
        print(neph_data['Date'].iloc[idx[0]], neph_data['Date'].iloc[idx[1]+1], '\n')

In [15]:
a = {'b1': 12, 'b4': 43, 'c': 534}
b = (a.get('b2', None), a.get('b4', None))
print(all(ele is None for ele in b))

False


In [None]:
import dash
from dash import dcc, html, Input, Output
import plotly.graph_objects as go

# Create the Dash app
app = dash.Dash(__name__)

# Create a simple Plotly figure
fig = go.Figure(data=[go.Scatter(x=[1, 2, 3, 4], y=[1, 4, 9, 16])])

# Define the layout of the Dash app
app.layout = html.Div([
    # Graph component to display the plot
    dcc.Graph(id='graph', figure=fig),

    # Text element to show live axis ranges
    html.Div(id='axis-ranges', style={'font-size': '20px', 'margin-top': '20px'})
])

# Define a callback to update the live axis ranges
@app.callback(
    Output('axis-ranges', 'children'),
    Input('graph', 'relayoutData')  # Listen to relayoutData property
)
def update_axis_ranges(relayout_data):

    if relayout_data:
        # Check if xaxis.range or yaxis.range exist
        x_range = (relayout_data.get('xaxis.range[0]', None), relayout_data.get('xaxis.range[1]', None))
        y_range = (relayout_data.get('yaxis.range[0]', None), relayout_data.get('yaxis.range[1]', None))
        
        # If axes are in autorange, they won't have a 'range' field, so handle that
        if all(ele is not None for ele in x_range) and all(ele is not None for ele in y_range):
            return f"X-axis range: {x_range} | Y-axis range: {y_range}"
        elif (all(ele is not None for ele in x_range) and all(ele is None for ele in y_range)):
            return f"Y-axis is in autorange or unchanged. X-axis range: {x_range}"
        elif all(ele is None for ele in x_range) and all(ele is not None for ele in y_range):
            return f"X-axis is in autorange or unchanged. Y-axis range: {y_range}"
        elif all(ele is None for ele in x_range) and all(ele is None for ele in y_range):
            return "Axes are in autorange mode. Zoom or pan to change ranges."
        
    return "Zoom or pan the graph to see axis ranges."

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)

In [None]:
    # Filter data based on the visible ranges
    if all(ele is not None for ele in x_range) and all(ele is not None for ele in y_range):
        filtered_df = df[(df['x'] >= x_range[0]) & (df['x'] <= x_range[1]) &
                         (df['y'] >= y_range[0]) & (df['y'] <= y_range[1])]
    
    elif all(ele is None for ele in x_range) and all(ele is not None for ele in y_range):
        filtered_df = df[(df['y'] >= y_range[0]) & (df['y'] <= y_range[1])]

    elif all(ele is not None for ele in x_range) and all(ele is None for ele in y_range):
        filtered_df = df[(df['x'] >= y_range[0]) & (df['x'] <= y_range[1])]

In [5]:
relayout_data = {'selections': []}
if relayout_data is None or next((True for axrng in relayout_data if 'autorange' in axrng), False):
        if relayout_data is None or next((True for axrng in relayout_data if 'xaxis' in axrng), False) and \
         next((True for axrng in relayout_data if 'yaxis' in axrng), False):
            print('both')

        elif next((True for axrng in relayout_data if 'xaxis' in axrng), False) and \
         next((False for axrng in relayout_data if 'yaxis' in axrng), True):
            print('y')

        elif next((False for axrng in relayout_data if 'xaxis' in axrng), True) and \
         next((True for axrng in relayout_data if 'yaxis' in axrng), False):
            print('x')
        else:
            print('fuck dig')
else:
     print('fafafasf')

fafafasf


In [None]:
relayout_data = {'yaxis.autorange': True, 'yaxis.showspikes': False}
print(next((False for axrng in relayout_data if 'xaxis' in axrng), True))

True


In [12]:
data = pd.DataFrame({
    'x': np.linspace(0, 10, 100),
    'y': np.sin(np.linspace(0, 10, 100)),
    'value': np.random.randn(100)
})
df = pd.DataFrame(data)

df.to_csv('bob.csv')

In [None]:
import pandas as pd

data = {
    'x': [np.nan],
    'y': [np.nan],
    'Flag': [np.nan]
}

df = pd.DataFrame(data)

cols = df.columns

In [38]:
a = ['Flag']
b = ['x']
c = ['y']

all_columns = [{'name': col, 'id': col} for col in  sorted(list(a + b + c), key=lambda col: df.columns.get_loc(col))]  # Define columns for the table

print(all_columns)

[{'name': 'x', 'id': 'x'}, {'name': 'y', 'id': 'y'}, {'name': 'Flag', 'id': 'Flag'}]
