In [1]:
!python --version

Python 3.7.14


In [None]:
!sudo pip install git+https://github.com/aerosense-ai/aerosense-tools.git@0.3.1

In [3]:
from google.colab import auth
auth.authenticate_user()

In [4]:
import datetime as dt

from aerosense_tools.queries import BigQuery
from aerosense_tools import plots

In [5]:
import datetime as dt

from aerosense_tools.queries import BigQuery
from aerosense_tools import plots

In [None]:
client = BigQuery()

In [7]:
import numpy as np
import pandas as pd
from scipy.interpolate import interp1d

In [91]:
class RawSignal:
    def __init__(self, dataframe, sensor):
        self.dataframe = dataframe
        self.sensor = sensor

    def pad_gaps(self, threshold):
        """Checks for missing data. If the gap between samples (timedelta) is
         higher than the given threshold, then the last sample before the gap
        start is replaced with NaN. Thus no interpolation will be perfomed 
        during the non-sampling time window.

        :param threshold: maximum gap between two samples as a timedelta type
        """

        self.dataframe[self.dataframe.index.to_series().diff() > threshold] = np.NaN


    def to_constant_timestep(self, time_step):
        """Resample dataframe to the given time step. Linearly interpolates between samples.

        :param float time_step: timestep in seconds
        :return: resampled and interpolated data
        """
        old_time_vector = self.dataframe.index.values.astype(np.int64)
        new_time_vector = pd.date_range(
            start=self.dataframe.index[0],
            end=self.dataframe.index[-1],
            freq="{:.12f}S".format(time_step)
        )

        new_dataframe = pd.DataFrame(index=new_time_vector)

        for column in self.dataframe.columns:
            signal = interp1d(old_time_vector, self.dataframe[column], assume_sorted=True)
            new_dataframe[column] = signal(new_time_vector.values.astype(np.int64))

        self.dataframe = new_dataframe

    def filter_outliers(self, window, std_multiplier):
        """A very primitive filter. Removes data points outside the confidence interval using a rolling median and
        standard deviation.

        :param int window: window (number of samples) for rolling median and standard deviation
        :param float std_multiplier: multiplier to the rolling standard deviation
        """
        rolling_median = self.dataframe.rolling(window).median()
        rolling_std = self.dataframe.rolling(window).std()
        # TODO define filtering rule using rolling df here
        self.dataframe = self.dataframe[
            (self.dataframe <= rolling_median + std_multiplier * rolling_std)
            & (self.dataframe >= rolling_median - std_multiplier * rolling_std)
        ]

    def measurement_to_variable(self):
        """Transform fixed point values to a physical variable."""
        if self.sensor == "barometer":
            self.dataframe /= (40.96*101325)  # [Atm]
        if self.sensor == "barometer_thermometer":
            self.dataframe /= 100  # [Celsius]


    

In [10]:
client = BigQuery()

In [153]:
df, data_limit_applied = client.get_sensor_data(
    installation_reference="aventa-turbine-test",
    node_id="2",
    sensor_type_reference="barometer",
    start=dt.datetime(2022, 7, 21, 10, 39, 00),
    finish=dt.datetime(2022, 7, 21, 10, 42, 00),
    row_limit=10000,
)

In [154]:
data_columns =  df.columns[df.columns.str.startswith('f')].tolist()
signal_df = df[["datetime"]+data_columns].set_index('datetime')
barometer = RawSignal(signal_df, "barometer")

In [155]:
barometer.pad_gaps(dt.timedelta(seconds=1))

In [156]:
barometer.measurement_to_variable()


There seems to be loads of non-sensical values.  Lets remove everything more than 1.5 Atm and less than 0.7  (... that's my arbitrary choice now, before I can come up with something better)

In [134]:
barometer.dataframe = barometer.dataframe[(barometer.dataframe <= 1.5) & (barometer.dataframe >= 0.7)]

In [None]:
pre_processed_df = barometer.dataframe.reset_index()

Lets plot just around peak pressure, so that we can look at the data..

In [140]:
plot_start = dt.datetime(2022, 7, 21, 10, 41, 51, 150000)
plot_finish = dt.datetime(2022, 7, 21, 10, 41, 51, 650000)
time_mask = ((barometer.dataframe.index > plot_start) & (barometer.dataframe.index < plot_finish))
pre_processed_df = barometer.dataframe[time_mask].reset_index()

Lets Grab the temperature

In [147]:
df, data_limit_applied = client.get_sensor_data(
    installation_reference="aventa-turbine-test",
    node_id="2",
    sensor_type_reference="barometer_thermometer",
    start=plot_start,
    finish=plot_finish,
    row_limit=10000,
)

data_columns =  df.columns[df.columns.str.startswith('f')].tolist()
signal_df = df[["datetime"]+data_columns].set_index('datetime')
thermometer = RawSignal(signal_df, "barometer_thermometer")
thermometer.measurement_to_variable()

In [162]:
thermo_filter = ((thermometer.dataframe <= 30) & (thermometer.dataframe >= 10))
thermometer.dataframe = thermometer.dataframe[thermo_filter]

We can filter baro data with thermo filter as it is more obvious from temperature when the data is reasonable.

In [163]:
baro_thermo_df = barometer.dataframe[time_mask]
pre_processed_df=baro_thermo_df[thermo_filter].reset_index()

In [159]:
pre_processed_df = thermometer.dataframe.reset_index()

In [None]:
pre_processed_df

In [164]:
plots.plot_sensors(pre_processed_df)