In [1]:
import requests
import json
import pandas as pd
#from tqdm import tqdm  
from datetime import date,timedelta, datetime
import numpy as np
from sqlalchemy.orm import Session
from sqlalchemy.engine import reflection
from sqlalchemy import inspect
from sqlalchemy import create_engine, Column, Integer, String, TIMESTAMP, FLOAT, MetaData, Table, text, DateTime, Float
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy.exc import IntegrityError, SQLAlchemyError, InterfaceError
from sqlalchemy.ext.declarative import declarative_base
from data_loading import load_data_from_db, preprocess_sensor_data
from os import path
import pandas as pd
from utils import preprocess_sensor_data
from sqlalchemy.sql import select
from sqlalchemy import Table, MetaData
from sqlalchemy import inspect
from sqlalchemy import create_engine
from model import get_session
import matplotlib.pyplot as plt
from pathlib import Path
from missing_values_analysis import calculate_missing_readings

In [2]:
def get_db_connection(db_user, db_pass, db_ip, db_port, db_name):
    try:
        connection_url = f'leanxcale://{db_user}:{db_pass}@{db_ip}:{db_port}/{db_name}?autocommit=False&parallel=True?txn_mode=NO_CONFLICTS_NO_LOGGING'
        eng = create_engine(connection_url)
        return eng
    except Exception as e:
        st.error(f"Error connecting to the database: {e}")
        return None


def get_table_names(db_connection):
    try:
        if db_connection is None:
            raise ValueError("No database connection available.")
        return inspect(db_connection).get_table_names()
    except Exception as e:
        st.error(f"Error fetching table names: {e}")
        return []


def load_data_from_db(table_name, engine):
    metadata = MetaData(bind=engine)
    table = Table(table_name, metadata, autoload=True)
    query = select([table])
    with engine.connect() as connection:
        result = connection.execute(query)
        df = pd.DataFrame(result.fetchall(), columns=result.keys())
        df.columns = df.columns.str.lower()
        df.set_index('timestamp', inplace=True)
    return df



# Connection
DB_USER = 'app'
DB_PASS = 'app'
DB_IP = '0.0.0.0'
DB_PORT = '1529'
DB_NAME = 'MOH'

In [3]:
engine = get_db_connection(DB_USER, DB_PASS, DB_IP, DB_PORT, DB_NAME)


In [4]:
table_names = get_table_names(engine)
table_names = [col for col in table_names if "hours" in col.lower()]


In [6]:
selected_table = table_names[0]

In [7]:
selected_table

'K3301_HOURS'

In [11]:
readings = load_data_from_db(selected_table, engine)

pk_list: []


In [9]:
readings, sensors = preprocess_sensor_data(readings)


In [17]:
readings.to_csv('readings.csv')

In [12]:
readings.columns

Index(['count_col33vi603_isvalid', 'count_col33vi601_isvalid',
       'count_col33vi602_isvalid', 'count_col33vi604_isvalid',
       'count_col33si501a_isvalid', 'count_col33pi222_isvalid',
       'count_col33pi601_isvalid', 'min_col33vi603', 'max_col33vi603',
       'sum_col33vi603', 'count_col33vi603', 'min_col33vi601',
       'max_col33vi601', 'sum_col33vi601', 'count_col33vi601',
       'min_col33vi602', 'max_col33vi602', 'sum_col33vi602',
       'count_col33vi602', 'min_col33vi604', 'max_col33vi604',
       'sum_col33vi604', 'count_col33vi604', 'min_col33si501a',
       'max_col33si501a', 'sum_col33si501a', 'count_col33si501a',
       'min_col33pi222', 'max_col33pi222', 'sum_col33pi222',
       'count_col33pi222', 'min_col33pi601', 'max_col33pi601',
       'sum_col33pi601', 'count_col33pi601'],
      dtype='object')

In [20]:
import pandas as pd

def preprocess_sensor_data(df):
    # Identify unique sensors
    sensors = list(set(col.split('_')[1] for col in df.columns if col.startswith('sum')))

    # Calculate mean value per sensor
    mean_values_per_sensor = {}
    alarms_per_sensor = {}
    for sensor in sensors:
        sum_col = f'sum_{sensor}'
        count_col = f'count_{sensor}'
        mean_col = sensor  # Just the sensor ID for mean values
        alarm_col = f'count_{sensor}_isvalid'
        alarms_col = f'{sensor}_alarms'  # <sensor_id>_alarms for alarms

        if sum_col in df.columns and count_col in df.columns:
            mean_values_per_sensor[mean_col] = df[sum_col] / df[count_col]

        if alarm_col in df.columns:
            alarms_per_sensor[alarms_col] = df[alarm_col]

    # Convert to DataFrames
    mean_df = pd.DataFrame(mean_values_per_sensor)
    alarms_df = pd.DataFrame(alarms_per_sensor)

    # Combine mean values and alarm counts into a single DataFrame
    result_df = pd.concat([mean_df, alarms_df], axis=1)

    # Extract sensor IDs
    sensors = [s.replace('col', '') for s in sensors]

    return result_df, sensors


In [22]:
preprocess_sensor_data(readings)[1]

['33vi602', '33si501a', '33pi222', '33pi601', '33vi604', '33vi601', '33vi603']

In [22]:
data= readings.copy()

In [31]:
(data['count_col22si101_isvalid']/360).mean()

0.9999533637832755

In [27]:
sensor_columns = [col for col in data.columns if '_isvalid' in col]
print(sensor_columns)
print(expected_readings_per_hour)

invalid_readings = {}
invalid_percentages = {}

for sensor in sensor_columns:
    valid_col = f"{sensor}_isvalid"
    invalid_col = f"{sensor}_invalid"
    percentage_col = f"{sensor}_invalid_percentage"

    data[invalid_col] = data[sensor] - data[valid_col]
    data[percentage_col] = (data[invalid_col] / expected_readings_per_hour) * 100

    invalid_readings[invalid_col] = data[invalid_col]
    invalid_percentages[percentage_col] = data[percentage_col]

invalid_readings_df = pd.DataFrame(invalid_readings)
invalid_percentages_df = pd.DataFrame(invalid_percentages)


['count_col22si101_isvalid', 'count_col22vi01_isvalid', 'count_col22vi04_isvalid', 'count_col22vi06_isvalid', 'count_col22vi08_isvalid', 'count_col22pi69_isvalid', 'count_col22pi70_isvalid', 'count_col22zi10_isvalid', 'count_col22zi09_isvalid', 'count_col22zi11_isvalid']


NameError: name 'expected_readings_per_hour' is not defined

In [37]:
def invalid_readings_df(data, original_freq_sec=10, agg_interval_sec=3600):
    """
    Calculate the number and percentage of invalid readings for each sensor per hour.

    Parameters:
    data (pd.DataFrame): The dataframe containing sensor readings.
    original_freq_sec (int): The frequency of the original data in seconds (default is 10 seconds).
    agg_interval_sec (int): The aggregation interval in seconds (default is 3600 seconds for 1 hour).

    Returns:
    pd.DataFrame: A dataframe with the percentage of invalid readings for each sensor per hour.
    """
    expected_readings_per_hour = agg_interval_sec // original_freq_sec
    sensor_columns = [col for col in data.columns if '_isvalid' in col]
    print(sensor_columns)
    print(expected_readings_per_hour)

    return 1 - data[sensor_columns]/expected_readings_per_hour

In [56]:
readings

Unnamed: 0_level_0,min_col22si101,max_col22si101,sum_col22si101,count_col22si101,count_col22si101_isvalid,min_col22vi01,max_col22vi01,sum_col22vi01,count_col22vi01,count_col22vi01_isvalid,...,count_col22pi69_invalid,count_col22pi69_invalid_percentage,count_col22pi70_invalid,count_col22pi70_invalid_percentage,count_col22zi10_invalid,count_col22zi10_invalid_percentage,count_col22zi09_invalid,count_col22zi09_invalid_percentage,count_col22zi11_invalid,count_col22zi11_invalid_percentage
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-05-15 00:00:00,9501.716797,9545.974609,3.428974e+06,360,360,14.831258,16.351448,5644.447565,360,360,...,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
2021-05-15 01:00:00,9502.479492,9545.211914,3.428997e+06,360,360,14.727203,15.871805,5468.730871,360,360,...,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
2021-05-15 02:00:00,9498.109375,9544.543945,3.428868e+06,360,360,14.667853,15.795498,5480.547270,360,360,...,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
2021-05-15 03:00:00,9497.806641,9550.552734,3.428736e+06,360,360,14.967025,16.177031,5597.863029,360,360,...,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
2021-05-15 04:00:00,9502.479492,9545.211914,3.428412e+06,360,360,14.879817,16.253338,5619.864493,360,360,...,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-10-22 20:00:00,8647.843750,8760.015625,3.131305e+06,360,360,10.835559,11.751240,4061.961130,360,360,...,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
2023-10-22 21:00:00,8663.869141,8727.966797,3.131059e+06,360,360,10.759253,11.598626,4016.177087,360,360,...,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
2023-10-22 22:00:00,8669.209961,8727.966797,3.131107e+06,360,360,10.835559,11.446013,3983.441483,360,360,...,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
2023-10-22 23:00:00,8663.869141,8722.625000,3.131428e+06,360,360,10.759253,11.598626,4021.671171,360,360,...,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0


In [41]:
df = invalid_readings_df(readings)

['count_col22si101_isvalid', 'count_col22vi01_isvalid', 'count_col22vi04_isvalid', 'count_col22vi06_isvalid', 'count_col22vi08_isvalid', 'count_col22pi69_isvalid', 'count_col22pi70_isvalid', 'count_col22zi10_isvalid', 'count_col22zi09_isvalid', 'count_col22zi11_isvalid']
360


In [54]:
def plot_invalid_over_mean(data, invalid_data, sensor):
    plt.figure(figsize=(14, 7))

    # # Ensure timestamp is datetime
    # if not pd.api.types.is_datetime64_any_dtype(data.index):
    #     data.index = pd.to_datetime(data.index)
    # if not pd.api.types.is_datetime64_any_dtype(invalid_data['timestamp']):
    #     invalid_data['timestamp'] = pd.to_datetime(invalid_data['timestamp'])

    # # Set timestamp as index for invalid_data
    # invalid_data.set_index('timestamp', inplace=True)

    # Plot the mean readings time series
    mean_series = data[f'sum_{sensor}'] / data[f'count_{sensor}']
    plt.plot(mean_series, label='Mean Readings', color='blue')

    # Plot the invalid data points
    invalid_points = invalid_data[invalid_data[f'count_{sensor}_isvalid'] > 0]
    plt.scatter(invalid_points.index, mean_series.loc[invalid_points.index], color='red', label='Invalid Readings')

    plt.title(f'Mean Readings and Invalid Readings for {sensor}')
    plt.xlabel('Time')
    plt.ylabel('Mean Reading')
    plt.legend()
    plt.show()
    st.pyplot(plt)


In [55]:
plot_invalid_over_mean(readings, df, 'col22pi69')

  plt.show()


NameError: name 'st' is not defined

In [23]:
def calculate_invalid_readings(data, original_freq_sec=10, agg_interval_sec=3600):
    """
    Calculate the number and percentage of invalid readings for each sensor per hour.

    Parameters:
    data (pd.DataFrame): The dataframe containing sensor readings.
    original_freq_sec (int): The frequency of the original data in seconds (default is 10 seconds).
    agg_interval_sec (int): The aggregation interval in seconds (default is 3600 seconds for 1 hour).

    Returns:
    pd.DataFrame: A dataframe with the number and percentage of invalid readings for each sensor per hour.
    """
    expected_readings_per_hour = agg_interval_sec // original_freq_sec
    sensor_columns = [col for col in data.columns if 'count_' in col and '_isvalid' not in col]
    print(sensor_columns)
    print(expected_readings_per_hour)

    invalid_readings = {}
    invalid_percentages = {}

    for sensor in sensor_columns:
        valid_col = f"{sensor}_isvalid"
        invalid_col = f"{sensor}_invalid"
        percentage_col = f"{sensor}_invalid_percentage"

        data[invalid_col] = data[sensor] - data[valid_col]
        data[percentage_col] = (data[invalid_col] / expected_readings_per_hour) * 100

        invalid_readings[invalid_col] = data[invalid_col]
        invalid_percentages[percentage_col] = data[percentage_col]

    invalid_readings_df = pd.DataFrame(invalid_readings)
    invalid_percentages_df = pd.DataFrame(invalid_percentages)
    return invalid_readings_df, invalid_percentages_df

In [57]:
# Load the data
file_path = 'readings.csv'
data = pd.read_csv(file_path)

In [24]:
x,y = calculate_invalid_readings(readings, original_freq_sec=10, agg_interval_sec=3600)

['count_col22si101', 'count_col22vi01', 'count_col22vi04', 'count_col22vi06', 'count_col22vi08', 'count_col22pi69', 'count_col22pi70', 'count_col22zi10', 'count_col22zi09', 'count_col22zi11']
360


In [25]:
y.describe()

Unnamed: 0,count_col22si101_invalid_percentage,count_col22vi01_invalid_percentage,count_col22vi04_invalid_percentage,count_col22vi06_invalid_percentage,count_col22vi08_invalid_percentage,count_col22pi69_invalid_percentage,count_col22pi70_invalid_percentage,count_col22zi10_invalid_percentage,count_col22zi09_invalid_percentage,count_col22zi11_invalid_percentage
count,21383.0,21383.0,21383.0,21383.0,21383.0,21383.0,21383.0,21383.0,21383.0,21383.0
mean,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
std,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [23]:

# Load the data
file_path = 'readings.csv'
data = pd.read_csv(file_path)

# Calculate the number of invalid readings for each sensor per hour
# Assuming that invalid readings are the difference between total readings and valid readings
sensor_columns = [col for col in data.columns if 'count_' in col and '_isvalid' not in col]

for sensor in sensor_columns:
    valid_col = f"{sensor}_isvalid"
    invalid_col = f"{sensor}_invalid"
    data[invalid_col] = data[sensor] - data[valid_col]

# Annotate each hour with a feature indicating the quality of the data based on the number of invalid readings
# Here we sum up all invalid readings to get a general quality indicator per hour
data['total_invalid_readings'] = data[[f"{sensor}_invalid" for sensor in sensor_columns]].sum(axis=1)

# Optionally, you can create a categorical label based on the number of invalid readings
# For example, if total_invalid_readings > threshold, label as 'poor', otherwise 'good'
threshold = 10  # Set your own threshold
data['data_quality'] = data['total_invalid_readings'].apply(lambda x: 'poor' if x > threshold else 'good')

# Prepare the dataset for machine learning by dropping unnecessary columns
# Keep timestamp, quality indicator, and/or labels, and sensor features
features = ['timestamp', 'total_invalid_readings', 'data_quality'] + [col for col in data.columns if 'min_' in col or 'max_' in col or 'sum_' in col]
ml_data = data[features]

# Encode categorical labels if needed (e.g., for supervised learning)
ml_data['data_quality'] = ml_data['data_quality'].map({'good': 0, 'poor': 1})

# Save or return the prepared dataset
ml_data.to_csv('prepared_ml_data.csv', index=False)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ml_data['data_quality'] = ml_data['data_quality'].map({'good': 0, 'poor': 1})


In [22]:
!pip install ace_tools


Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
[31mERROR: Could not find a version that satisfies the requirement ace_tools (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for ace_tools[0m[31m
[0m

In [29]:
ml_data.total_invalid_readings.unique()

array([0])

In [27]:
def calculate_total_invalid_per_sensor(data):
    """
    Calculate the total number of invalid readings for each sensor.

    Parameters:
    data (pd.DataFrame): The dataframe containing sensor readings.

    Returns:
    pd.Series: A series with sensor names as index and total invalid readings as values.
    """
    sensor_columns = [col for col in data.columns if 'count_' in col and '_isvalid' not in col]
    print(sensor_columns)
    total_invalid_per_sensor = {}

    for sensor in sensor_columns:
        valid_col = f"{sensor}_isvalid"
        invalid_col = f"{sensor}_invalid"
        data[invalid_col] = data[sensor] - data[valid_col]
        total_invalid_per_sensor[sensor] = data[invalid_col].sum()

    return pd.Series(total_invalid_per_sensor)

# Load the data
file_path = 'readings.csv'
data = pd.read_csv(file_path)

# Calculate the total invalid readings per sensor
total_invalid_readings = calculate_total_invalid_per_sensor(data)
total_invalid_readings


['count_col33vi603', 'count_col33vi601', 'count_col33vi602', 'count_col33vi604', 'count_col33si501a', 'count_col33pi222', 'count_col33pi601']


count_col33vi603     0
count_col33vi601     0
count_col33vi602     0
count_col33vi604     0
count_col33si501a    0
count_col33pi222     0
count_col33pi601     0
dtype: int64

In [38]:
import pandas as pd

def calculate_missing_readings(data, original_freq_sec=10, agg_interval_sec=3600):
    """
    Calculate the percentage of missing readings (NaNs) for each sensor per hour.

    Parameters:
    data (pd.DataFrame): The dataframe containing sensor readings.
    original_freq_sec (int): The frequency of the original data in seconds (default is 10 seconds).
    agg_interval_sec (int): The aggregation interval in seconds (default is 3600 seconds for 1 hour).

    Returns:
    pd.DataFrame: A dataframe with the percentage of missing readings for each sensor per hour.
    """
    expected_readings_per_hour = agg_interval_sec // original_freq_sec
    sensor_columns = [col for col in data.columns if 'count_' in col and '_isvalid' not in col]

    missing_percentages = (expected_readings_per_hour - data[sensor_columns]) / expected_readings_per_hour * 100
    missing_percentages.columns = [f"{sensor}_missing_percentage" for sensor in sensor_columns]

    return missing_percentages


In [107]:
def identify_intervals(timestamps, freq_sec):
    """
    Identify contiguous intervals from a list of timestamps.

    Parameters:
    timestamps (list): List of timestamps where readings are missing.
    freq_sec (int): The frequency of the original data in seconds.

    Returns:
    list: A list of tuples, each representing a start and end of a missing interval.
    """
    if timestamps.empty:
        return []

    intervals = []
    start = timestamps[0]
    end = timestamps[0]

    for i in range(1, len(timestamps)):
        if (timestamps[i] - timestamps[i - 1]).total_seconds() <= freq_sec:
            end = timestamps[i]
        else:
            intervals.append((start, end))
            start = timestamps[i]
            end = timestamps[i]

    intervals.append((start, end))  # Add the last interval
    return intervals

In [108]:
def calculate_missing_readings(data, original_freq_sec=10):
    """
    Calculate the percentage of missing readings (NaNs) for each sensor.

    Parameters:
    data (pd.DataFrame): The dataframe containing sensor readings.
    original_freq_sec (int): The frequency of the original data in seconds (default is 10 seconds).

    Returns:
    pd.DataFrame: A dataframe with the percentage of missing readings for each sensor.
    """
    # Calculate the total number of expected readings
    total_duration_sec = (data.index[-1] - data.index[0]).total_seconds()
    expected_readings = (total_duration_sec // original_freq_sec)
    sensor_columns = [col for col in data.columns if 'count_' in col and '_isvalid' not in col]

    missing_percentages = (expected_readings - data[sensor_columns].sum()) / expected_readings * 100
    missing_percentages = missing_percentages.reset_index()
    missing_percentages.columns = ['sensor', 'missing_percentage']

    missing = (expected_readings - data[sensor_columns].sum()) 
    missing = missing.reset_index()
    missing.columns = ['sensor', 'missing']
    

    missing_intervals = {}
    for sensor in sensor_columns:
        missing_timestamps = data[data[sensor]<360].index
        missing_intervals[sensor] = identify_intervals(missing_timestamps, original_freq_sec)



    return missing, missing_percentages, missing_intervals

In [158]:
def calculate_missing_readings(data, original_freq_sec=10, aggr=360):
    """
    Calculate the percentage of missing readings (NaNs) and periods of missing readings for each sensor.

    Parameters:
    data (pd.DataFrame): The dataframe containing sensor readings.
    original_freq_sec (int): The frequency of the original data in seconds (default is 10 seconds).

    Returns:
    pd.DataFrame: A dataframe with the percentage of missing readings for each sensor.
    dict: A dictionary with the periods of missing readings for each sensor.
    """
    # Calculate the total number of expected readings
    total_duration_sec = (data.index[-1] - data.index[0]).total_seconds()
    expected_readings = total_duration_sec // original_freq_sec

    sensor_columns = [col for col in data.columns if 'count_' in col and '_isvalid' not in col]

    missing_percentages = (expected_readings - data[sensor_columns].sum()) / expected_readings * 100
    missing_percentages = missing_percentages.reset_index()
    missing_percentages.columns = ['sensor', 'missing_percentage']

    missing = (expected_readings - data[sensor_columns].sum()) 
    missing = missing.reset_index()
    missing.columns = ['sensor', 'missing']
    

    missing_intervals = {}
    for sensor in sensor_columns:
        missing_timestamps = data[data[sensor]<aggr].index
        missing_intervals[sensor] = identify_intervals(missing_timestamps, aggr*original_freq_sec)

    return missing, missing_percentages, missing_intervals

def identify_intervals(timestamps, freq_sec):
    """
    Identify contiguous intervals from a list of timestamps.

    Parameters:
    timestamps (list): List of timestamps where readings are missing.
    freq_sec (int): The frequency of the original data in seconds.

    Returns:
    list: A list of tuples, each representing a start and end of a missing interval.
    """
    if timestamps.empty:
        return []

    intervals = []
    start = timestamps[0]
    end = timestamps[0]

    for i in range(1, len(timestamps)):
        if (timestamps[i] - end).total_seconds() <= freq_sec:
            end = timestamps[i]
        else:
            intervals.append((start, end))
            start = timestamps[i]
            end = timestamps[i]

    intervals.append((start, end))  # Add the last interval
    return intervals


In [154]:
missing, missing_periods = calculate_missing_readings(readings)


In [155]:
missing_periods['count_col33vi603']

[(Timestamp('2023-02-15 00:00:00'), Timestamp('2023-03-26 02:00:00')),
 (Timestamp('2023-03-26 04:00:00'), Timestamp('2023-05-14 23:00:00')),
 (Timestamp('2023-10-23 00:00:00'), Timestamp('2023-10-23 00:00:00'))]

In [156]:
missing

Unnamed: 0,sensor,missing_percentage
0,count_col33vi603,8.329423
1,count_col33vi601,8.329423
2,count_col33vi602,8.329423
3,count_col33vi604,8.329423
4,count_col33si501a,8.329423
5,count_col33pi222,8.329423
6,count_col33pi601,8.329423


In [159]:
readings

Unnamed: 0_level_0,count_col33vi603_isvalid,count_col33vi601_isvalid,count_col33vi602_isvalid,count_col33vi604_isvalid,count_col33si501a_isvalid,count_col33pi222_isvalid,count_col33pi601_isvalid,min_col33vi603,max_col33vi603,sum_col33vi603,...,sum_col33si501a,count_col33si501a,min_col33pi222,max_col33pi222,sum_col33pi222,count_col33pi222,min_col33pi601,max_col33pi601,sum_col33pi601,count_col33pi601
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-05-15 00:00:00,0,0,0,0,0,0,0,0.093979,2.950925,855.527964,...,2.777941e+06,360,-0.179779,2.224877,648.390362,360,9.448253,54.812092,6112.817972,360
2021-05-15 01:00:00,0,0,0,0,0,0,0,0.087731,2.980732,856.942858,...,2.768093e+06,360,-0.181610,2.227451,647.480360,360,9.492846,54.082409,6076.850348,360
2021-05-15 02:00:00,0,0,0,0,0,0,0,0.090974,3.052270,883.753914,...,2.775669e+06,360,-0.181610,2.144166,612.114070,360,9.357123,55.913773,6110.726194,360
2021-05-15 03:00:00,0,0,0,0,0,0,0,0.098235,3.042732,875.203113,...,2.769045e+06,360,-0.185639,2.052914,600.417815,360,9.302748,56.185616,6132.398027,360
2021-05-15 04:00:00,0,0,0,0,0,0,0,0.107190,2.973475,865.277831,...,2.769137e+06,360,-0.187715,2.057338,602.522817,360,9.301237,55.227009,6102.456418,360
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-10-22 20:00:00,0,0,0,0,0,0,0,2.432278,2.718428,925.219375,...,3.396931e+06,360,2.179270,2.218265,791.810840,360,9.668272,9.829887,3513.755909,360
2023-10-22 21:00:00,0,0,0,0,0,0,0,2.479970,2.813811,943.342240,...,3.375550e+06,360,2.189394,2.233262,797.471002,360,9.695459,9.828378,3515.986797,360
2023-10-22 22:00:00,0,0,0,0,0,0,0,2.527661,2.861503,961.035867,...,3.366482e+06,360,2.207016,2.238137,798.845922,360,9.718116,9.822335,3517.616538,360
2023-10-22 23:00:00,0,0,0,0,0,0,0,2.527661,2.813811,957.554371,...,3.363108e+06,360,2.211141,2.241511,801.286060,360,9.736241,9.870669,3529.615422,360
