# Telemetry data fetching & processing

## Data Ingestion

In [25]:
#Load API credentials from .env file & initialize global variables 

import os
from dotenv import load_dotenv

# Load secret .env file
load_dotenv()

# Store credentials
CLIENT_ID = os.getenv('CLIENT_ID')
CLIENT_SECRET = os.getenv('CLIENT_SECRET')

# Verify it worked
#if CLIENT_ID is not None and CLIENT_SECRET is not None:
    #print(CLIENT_ID)
    #print(CLIENT_SECRET)

#other global variables
THING_ID = "7db1852a-9709-471f-b049-7505253ceaad" #id of Arduino Thing of interest
SPACE_ID = '6b795d20-54e3-483c-a322-7114527741a1' #id of the shared space needed to access data

### Get data from Arduino IOT


In [26]:
import requests
import pandas as pd
import json

#returns standard header to be included when making http request to API
#stardard header includes the authorization (access token) and x-organization (space id)
#to be used before each API call (tokens expire every 3 minutes i believe)
def getHeader():
      client = requests.request('POST', 'https://api2.arduino.cc/iot/v1/clients/token',
                        headers={'content-type': 'application/x-www-form-urlencoded'},
                        data={'grant_type':'client_credentials',
                              'client_id':"qOiqWXbhwf3TKc2p7y6FU0JPRIkijSvl",
                              'client_secret':"WiU2AkNtZRB3TgrDKNYwpY6onBef3KdpiVadG6hOxH1wcB2CArI4NBTKyqtGLSil",
                              'audience':'https://api2.arduino.cc/iot'})

      access_token = 'Bearer '+ client.json()['access_token']

      #standard header for the http requests
      header = {'authorization':access_token,'x-organization':SPACE_ID}
      
      return header

#returns dictionary of the properties of the thing 
#key: property name, value: property id
def getPropertyIDs() -> {str, str}:
      
      #TO DO: implement a better way of regenerating headers when needed
      header = getHeader()

      #TO DO: add try block around this ?
      url = f"https://api2.arduino.cc/iot/v2/things/{THING_ID}/properties/"
      req = requests.request('GET', url, headers=header)
      
      #these are not numerical and you get an error when trying to fetch their data
      exclude = ['canMessage', 'motorOn', 'gpsCoordinates']

      #build porperty disctionary with property id for all properties that will be queried
      props = {}
      for prop in req.json():
            name = prop['name']
            if name not in exclude:
                  id = prop["id"]
                  props[id]=name
      return props

#get the data for one specific property
#interval: data binning interval in seconds, smallest possible interval is 1s
#limit of 1000 data point returns
def getPropertySeries(thing_id, prop_id, from_t, interval):
      header = getHeader()
      url = f"https://api2.arduino.cc/iot/v2/things/{thing_id}/properties/{prop_id}/timeseries?desc=true&from={from_t}&interval={interval}"
      req = requests.request('GET', url, headers=header)
      return req.json()

#class to hold parameters used when making a request for parameter data from API
#purpose of using a class is to *elegantly* create dictionary representation of requests
#designed for use in getRawBatchQuery
class propertyRequest():
      #if no from date is set, 1000 starting from 0001-01-01T00:00:00Z will be returned 
      def __init__(self,property_id, from_t=None, to_t=None, sort=None, interval=None, series_limit=None):
            self.q = f"property.{property_id}"
            
            if from_t is None: pass
            else: self.from_t = from_t
            
            if to_t is None: pass
            else: self.to_t = to_t

            if sort is None: pass
            else: self.sort = sort
            
            if interval is None: pass
            else: self.interval = interval

            if series_limit is None: pass
            else: self.series_limit = series_limit
            

#return raw data points within time frame from_t to to_t
#for all properties in the list props
#returns at most 1000 data points for each property 
def getRawBatchQuery(props, from_t=None, to_t=None, sort='DESC'):
      header = getHeader()

      #batch_query_raw
      #madatory: q
      #optinal : from, to, sort, series_limit
      url = f"https://api2.arduino.cc/iot/v2/series/batch_query_raw"

      #build list of requests to be made 
      #i.e. get list containing parameter dict for each property 
      reqs = []
      for prop_id in props.keys():
            #dictionary with request parameters
            req = vars(propertyRequest(prop_id, from_t, to_t, sort))
            
            #rename time stamps keys to proper name for query format
            if req.get("from_t"): req["from"] = req.pop("from_t")
            if req.get("to_t"): req["to"] = req.pop("to_t")

            #add it to the list
            reqs.append(req)
      
      #get string of the JSON object representing the query
      query_str = json.dumps({"resp_version": 1,"requests": reqs})
      #convert to byte array because this is the format it must be passed as
      query_bytes = bytearray(query_str, "utf-8")

      #make request
      req = requests.request('POST', url, headers=header, data=query_bytes)
      return req.json()['responses']

#return data points at a given interval in the given time range
#time range is not optional
#this is not the raw data, averages are applied in order to get requested number of data points
#i.e. it returns values at evenly distributed time intervals in the time range given
#i think 300 data points is the max for a series
#this method is slower since it manipulates the values
def getBatchQuery(props, from_t=None, to_t=None, interval=None, series_limit=None):
      #batch_query_raw
      #madatory: q, from, to, series_limit or interval
      #optinal : sort?
      url = f"https://api2.arduino.cc/iot/v2/series/batch_query"

      #build list of requests to be made 
      #i.e. get list containing parameter dict for each property 
      reqs = []
      for prop_id in props.values():
            #dictionary with request parameters
            req = vars(propertyRequest(prop_id, from_t, to_t, interval, series_limit))
            
            #rename time stamps keys to proper name for query format
            if req["from_t"]: req["from"] = req.pop("from_t")
            if req["to_t"]: req["to"] = req.pop("to_t")

            #add it to the list
            reqs.append(req)
      
      #get string of the JSON object representing the query
      query_str = json.dumps({"resp_version": 1,"requests": reqs})
      #convert to byte array because this is the format it must be passed as
      query_bytes = bytearray(query_str, "utf-8")

      #make request
      req = requests.request('POST', url, headers=header, data=query_bytes)
      return req.json()['responses']

#props = getPropertyIDs()
#res = getBatchQuery(props, from_t="2023-09-22T19:20:00.00Z", to_t="2023-09-22T20:00:00.00Z", interval=1)


In [29]:
#fetch data from cloud
from_time = "2023-09-22T00:00:00.00Z" #get data starting from this time, this would be the start time 
props = getPropertyIDs()
output = getRawBatchQuery(props, from_time) #limit of 1000 data points for each property

### Merge returned data into dataframe

In [31]:
#Process returned data into dataframe

print(output)

telemetry_data = pd.DataFrame(columns=['time']) #main df to which all data will be merged

#add data for each proppety to the dataframe
for prop_result in output:
    
    #query has format property.<id>
    prop_id = prop_result['query'].split(".",1)[1]
   
    #get property name 
    prop_name = props[prop_id]

    #get the property data
    #TO DO: add try block around this 
    #load next property data into a dataframe
    temp_df = pd.DataFrame({
        "time":prop_result['times'],
        prop_name : prop_result['values']})

    #convert 'time' column values to dateTime data type
    temp_df['time'] = pd.to_datetime(temp_df['time'], format="ISO8601") 
    #convert data column to numerica data type
    temp_df[prop_name] = pd.to_numeric(temp_df[prop_name])
    #rename the value column to the corresponding parameter
    temp_df = temp_df.rename(columns={"value": prop_name})
    #merge it with the main df, merges on the time value in order and fills missing values with last value 
    telemetry_data = pd.merge_ordered(telemetry_data,temp_df,how='outer', fill_method="ffill")

[{'count_values': 1000, 'from_date': '2023-09-22T00:00:00Z', 'message': '', 'query': 'property.4a8c7f83-7ca2-4878-a48a-756747113d21', 'resp_version': 1, 'series': {'metric': 'property.4a8c7f83-7ca2-4878-a48a-756747113d21'}, 'series_limit': 1000, 'sort': 'DESC', 'status': 'ok', 'times': ['2023-09-22T19:54:25.822Z', '2023-09-22T19:54:26.208Z', '2023-09-22T19:54:26.709Z', '2023-09-22T19:54:27.267Z', '2023-09-22T19:54:27.685Z', '2023-09-22T19:54:28.206Z', '2023-09-22T19:54:28.683Z', '2023-09-22T19:54:29.351Z', '2023-09-22T19:54:29.746Z', '2023-09-22T19:54:30.249Z', '2023-09-22T19:54:30.747Z', '2023-09-22T19:54:31.227Z', '2023-09-22T19:54:31.848Z', '2023-09-22T19:54:32.215Z', '2023-09-22T19:54:32.721Z', '2023-09-22T19:54:33.258Z', '2023-09-22T19:54:33.727Z', '2023-09-22T19:54:34.371Z', '2023-09-22T19:54:34.735Z', '2023-09-22T19:54:35.235Z', '2023-09-22T19:54:35.716Z', '2023-09-22T19:54:36.267Z', '2023-09-22T19:54:36.74Z', '2023-09-22T19:54:37.232Z', '2023-09-22T19:54:37.711Z', '2023-09-22T1

### Apply transformations to data

In [None]:
#data transformation functions

#convert current from code to amp values
#based on calibrated function
#TO DO: get new function
def codeToAmps(x):
    return (0.01082 * x + -20.17682) * 2

#convert voltage from code to volt values
#based on calibrated function
#TO DO: get new function
def codeToVolts(x):
    return 0.02048 * x + -0.53823

#convert meter value to kilometers
def metersToKM(meters):
    return meters / 1000
