# Dependences and new code

In [None]:
pip install git+https://github.com/aerosense-ai/aerosense-tools.git@0.3.2

In [None]:
from google.colab import auth
auth.authenticate_user()

In [None]:
import datetime as dt

from aerosense_tools.queries import BigQuery

In [None]:
client = BigQuery()

In [None]:
print(client.get_sensor_types())
print(client.get_installations())

['accelerometer', 'barometer', 'barometer_thermometer', 'battery_voltmeter', 'connection_statistics', 'differential_barometer', 'gyroscope', 'magnetometer', 'microphone']
[{'label': 'aventa-turbine-test (Turbine 0)', 'value': 'aventa-turbine-test'}, {'label': 'my-next-test-installation (Turbine 0)', 'value': 'my-next-test-installation'}, {'label': 'ost-wt-evaluation (Turbine 0)', 'value': 'ost-wt-evaluation'}, {'label': 'ost-wt-tests (Turbine 0)', 'value': 'ost-wt-tests'}, {'label': 'pbl-test (Turbine unknown)', 'value': 'pbl-test'}, {'label': 'test-installation (Turbine 0)', 'value': 'test-installation'}]


In [None]:
#Get the time serties for barometer by running a SQL query
sensor_types = [
    'barometer',
    'accelerometer',
    'gyroscope',
    'magnetometer',
    'differential_barometer'
    ]

sample_time={}

for sensor in sensor_types:

  query_string = f"""
        SELECT datetime
        FROM `greta.sensor_data`
        WHERE sensor_type_reference="{sensor}" and
        node_id="1" and
        installation_reference = "pbl-test" and
        datetime > "2022-11-15 00:00:00.0"
        ORDER BY datetime ASC
        """

  sample_time[sensor]=client.query(query_string)


In [None]:
# Not sure if this can be done more elegantly. 
# And, surely, can be done directly with SQL (I just don't dare).


# Define a session as any continuous measurement with a max gap between
# consecutive measurements of no longer than x seconds​. 
sensor_sessions={}

for sensor in sensor_types:
  # Maximum gap of 60 sec, in theory can different for different sensors
  threshold = dt.timedelta(seconds=60) 

  session_starts = sample_time[sensor]['datetime'].diff() > threshold
  session_ends = abs(sample_time[sensor]['datetime'].diff(-1)) > threshold
  session_starts.iloc[0]=session_ends.iloc[-1]=True

  # Not sure if concat is the best way, but will do for now
  sessions=pd.concat([sample_time[sensor][session_starts], sample_time[sensor][session_ends]], axis=1)
  sessions.columns=['start','end']

  # Edge case of a single measurement point:
  if any(sessions['start']==sessions['end'])==True:
    print('Warning: Sensor {} has single measuremnt points'.format(sensor))
    print(sessions[sessions['start']==sessions['end']])
    sessions=sessions[sessions['start']!=sessions['end']]

  sessions['end'] = sessions['end'].shift(-1)
  sensor_sessions[sensor]=sessions.dropna().reset_index(drop=True)

                             start                        end
0       2022-11-16 15:20:01.975915 2022-11-16 15:20:01.975915
644878  2022-11-16 21:55:04.435144 2022-11-16 21:55:04.435144
1010576 2022-11-17 01:53:38.841969 2022-11-17 01:53:38.841969
1855786 2022-11-17 10:15:59.027464 2022-11-17 10:15:59.027464
2044214 2022-11-17 20:15:29.137279 2022-11-17 20:15:29.137279
2044215 2022-11-18 04:17:36.666142 2022-11-18 04:17:36.666142


In [None]:
for sensor in sensor_types:
  sensor_sessions[sensor]['interval']=sensor_sessions[sensor].apply(
      lambda row: pd.Interval(row['start'],row['end'], 'both'), axis=1
      )
  sensor_sessions[sensor]['duration'] = sensor_sessions[sensor]['end'] - sensor_sessions[sensor]['start']
  

for sensor in ['differential_barometer','accelerometer', 'gyroscope', 'magnetometer']:
  intervals = pd.arrays.IntervalArray(sensor_sessions[sensor]['interval'])

  sensor_sessions['barometer'][sensor]=sensor_sessions['barometer'].apply(
      lambda row: any(intervals.overlaps(row['interval'])), axis=1
  )


In [None]:
sensor_sessions['barometer'].drop('interval', axis=1)

Unnamed: 0,start,end,duration,differential_barometer,accelerometer,gyroscope,magnetometer
0,2022-11-16 15:50:39.857688,2022-11-16 15:59:50.441781,0 days 00:09:10.584093,True,True,True,True
1,2022-11-16 16:10:41.092540,2022-11-16 16:20:41.019697,0 days 00:09:59.927157,True,True,True,True
2,2022-11-16 16:40:41.032332,2022-11-16 16:50:41.022636,0 days 00:09:59.990304,True,True,True,True
3,2022-11-16 17:10:41.093177,2022-11-16 17:20:40.987034,0 days 00:09:59.893857,True,True,True,True
4,2022-11-16 17:40:41.194619,2022-11-16 17:50:41.062035,0 days 00:09:59.867416,True,True,True,True
5,2022-11-16 18:10:41.095954,2022-11-16 18:20:41.076240,0 days 00:09:59.980286,True,True,True,True
6,2022-11-16 18:40:41.253645,2022-11-16 18:50:41.142496,0 days 00:09:59.888851,True,True,True,True
7,2022-11-16 19:10:41.197300,2022-11-16 19:20:41.204387,0 days 00:10:00.007087,True,True,True,True
8,2022-11-16 19:40:41.251614,2022-11-16 19:50:41.227153,0 days 00:09:59.975539,True,True,True,True
9,2022-11-16 20:10:41.311151,2022-11-16 20:11:07.118621,0 days 00:00:25.807470,True,True,True,True


In [None]:
sensor_sessions['differential_barometer']

Unnamed: 0,start,end,interval,duration
0,2022-11-16 15:50:40.857658,2022-11-16 16:00:14.882558,"[2022-11-16 15:50:40.857658, 2022-11-16 16:00:...",0 days 00:09:34.024900
1,2022-11-16 16:10:42.192515,2022-11-16 16:20:41.840274,"[2022-11-16 16:10:42.192515, 2022-11-16 16:20:...",0 days 00:09:59.647759
2,2022-11-16 16:40:42.032301,2022-11-16 16:50:41.791242,"[2022-11-16 16:40:42.032301, 2022-11-16 16:50:...",0 days 00:09:59.758941
3,2022-11-16 17:10:42.093146,2022-11-16 17:20:41.707605,"[2022-11-16 17:10:42.093146, 2022-11-16 17:20:...",0 days 00:09:59.614459
4,2022-11-16 17:40:42.194619,2022-11-16 17:50:41.830610,"[2022-11-16 17:40:42.194619, 2022-11-16 17:50:...",0 days 00:09:59.635991
5,2022-11-16 18:10:42.090636,2022-11-16 18:20:41.796811,"[2022-11-16 18:10:42.090636, 2022-11-16 18:20:...",0 days 00:09:59.706175
6,2022-11-16 18:40:42.153639,2022-11-16 18:50:42.151092,"[2022-11-16 18:40:42.153639, 2022-11-16 18:50:...",0 days 00:09:59.997453
7,2022-11-16 19:10:42.197270,2022-11-16 19:20:42.016968,"[2022-11-16 19:10:42.197270, 2022-11-16 19:20:...",0 days 00:09:59.819698
8,2022-11-16 19:40:42.251614,2022-11-16 19:50:41.995759,"[2022-11-16 19:40:42.251614, 2022-11-16 19:50:...",0 days 00:09:59.744145
9,2022-11-16 20:10:42.312128,2022-11-16 20:11:07.124195,"[2022-11-16 20:10:42.312128, 2022-11-16 20:11:...",0 days 00:00:24.812067


In [None]:
#Samplewise allinmet check. 

df1 = sample_time['barometer'].set_index(sample_time['barometer']['datetime']).rename(columns = {'datetime':'baros_samples'})
df2 = sample_time['accelerometer'].set_index(sample_time['accelerometer']['datetime']).rename(columns = {'datetime':'accelerometer_samples'})
df3=pd.concat([df1, df2], axis=1)
df3['accelerometer_samples']=df3['accelerometer_samples'].shift(-1)

df1['accelerometer_bool']=df3['accelerometer_samples'] - df3['baros_samples'] < dt.timedelta(seconds=1)