# Similarity / Distance between Devices

In [1]:
from aux import *
import pandas as pd

## Devices SDF definitions

### Adapt SDF definition data for analysis

In [6]:
# Load SDF files to compare
sdf_manager = SDFManager(path='../iot/sdf/')
sdfs = sdf_manager.get_all_sdfs()

In [7]:
# Turn the SDF files into dataframes - Avoid redundant data
sdfs_trans = {}
columns = ['thing','thing_desc','obj','obj_desc','prop','prop_desc','prop_type','prop_unit']
rows = []
for name in sdfs :
    if name == 'Auxiliary':
        continue
    for sdfThing in sdfs[name]['sdfThing']:
        thing_dic = sdfs[name]['sdfThing'][sdfThing]
        thing_desc = thing_dic['description']
        for sdfObject in thing_dic['sdfObject']:
            object_dic = thing_dic['sdfObject'][sdfObject]
            object_desc = object_dic['description']
            for sdfProperty in object_dic['sdfProperty']:
                if sdfProperty == 'uuid':
                    continue
                prop_dic = object_dic['sdfProperty'][sdfProperty]
                prop_desc = prop_dic['description']
                prop_type = prop_dic['type']
                prop_unit = prop_dic['unit'] if 'unit' in prop_dic else None
                rows.append((sdfThing,thing_desc,sdfObject,object_desc,sdfProperty,prop_desc,prop_type,prop_unit))

sdfs_df = pd.DataFrame(columns=columns,data=rows)

In [8]:
# Air Quality SDF DATAFRAME
sdfs_df[sdfs_df.thing=='AirQuality'].iloc[:,2:].reset_index(drop=True)

Unnamed: 0,obj,obj_desc,prop,prop_desc,prop_type,prop_unit
0,temperature_sensor,Measures environmental temperature.,temperature,Temperature value,number,Cel
1,humidity_sensor,Measures environmental humidity.,humidity,Humidity value,number,%
2,pressure_sensor,Measures environmental pressure.,pressure,Pressure value,number,Pa
3,air_quality_sensor,Measures pollutants in the air.,pm1,"PM1 (viruses, exhaust gases...) value",number,ug/m3
4,air_quality_sensor,Measures pollutants in the air.,pm25,"PM2.5 (bacteria, spores, pollen, toner dust......",number,ug/m3
5,air_quality_sensor,Measures pollutants in the air.,pm10,"PM10 (pollen, desert dust...) value",number,ug/m3


In [9]:
# Air Quality Modified SDF DATAFRAME
sdfs_df[sdfs_df.thing=='AirQualityModified'].iloc[:,2:].reset_index(drop=True)

Unnamed: 0,obj,obj_desc,prop,prop_desc,prop_type,prop_unit
0,temperature_humidity_sensor,Measures environmental temperature and humidity.,temperature,Temperature value,number,Cel
1,temperature_humidity_sensor,Measures environmental temperature and humidity.,humidity,Humidity value,number,%
2,air_quality_sensor,Measures air pollutants.,pm25,PM2.5 value,number,ug/m3
3,air_quality_sensor,Measures air pollutants.,pm10,PM10 value,number,ug/m3


### Load devices data

In [15]:
# Read devices data samples from CSV
devs_data = pd.read_csv('../iot/devs_data.csv')
    

In [17]:
# Air Quality DATA DATAFRAME
devs_data[devs_data.thing=='AirQuality'].iloc[:,1:].reset_index(drop=True)

Unnamed: 0,obj,prop,v1,v2,v3,v4,v5,v6,v7,v8,...,v11,v12,v13,v14,v15,v16,v17,v18,v19,v20
0,temperature_sensor,temperature,19.69625,19.57349,19.98096,19.92684,19.58714,19.92733,20.19718,20.5725,...,20.17839,20.19816,19.9564,20.38733,19.84675,19.5146,19.9756,20.2573,19.87877,19.93596
1,humidity_sensor,humidity,30.03064,30.00884,29.94637,30.17026,30.03684,29.87614,30.33838,29.96442,...,29.73785,29.97156,30.25484,30.45879,30.09539,29.77266,30.02713,29.84713,29.82852,29.88137
2,pressure_sensor,pressure,100999.99156,100999.91324,101000.09967,100999.8897,101000.3143,101000.34863,100999.84228,101000.21777,...,101000.41121,100999.81936,100999.90709,101000.52012,100999.68828,101000.17992,101000.24414,101000.09274,101000.06615,100999.98942
3,air_quality_sensor,pm1,0.5,1.17668,0.60471,0.90414,1.5,1.5,1.23993,0.5,...,0.5,1.30384,1.5,0.56112,1.39899,1.5,1.5,0.71521,1.26724,1.5
4,air_quality_sensor,pm25,9.39768,9.44513,9.71794,8.88717,8.82636,9.32191,8.61828,9.36381,...,9.68199,9.53998,8.20925,7.93034,8.44177,9.12488,9.32861,8.86119,9.32772,8.62644
5,air_quality_sensor,pm10,17.93462,18.13677,18.53996,17.2831,18.32247,18.70148,17.94691,18.08423,...,17.76473,18.65214,18.70392,17.83664,18.01307,17.60257,17.33011,17.48503,18.60646,18.40956


In [18]:
# Air Quality DATA DATAFRAME
devs_data[devs_data.thing=='AirQualityModified'].iloc[:,1:].reset_index(drop=True)

Unnamed: 0,obj,prop,v1,v2,v3,v4,v5,v6,v7,v8,...,v11,v12,v13,v14,v15,v16,v17,v18,v19,v20
0,temperature_humidity_sensor,temperature,21.03632,20.56681,20.77288,20.9133,20.99129,20.93482,21.43756,20.97543,...,20.7833,21.1331,20.98169,20.70841,20.80229,21.47531,20.95453,21.07506,21.23916,21.14635
1,temperature_humidity_sensor,humidity,29.13339,29.02164,28.84511,29.13723,28.75906,28.93175,29.12306,29.44118,...,28.93624,28.75621,28.94279,29.08294,29.2467,29.0177,29.2698,28.81414,28.83147,28.89171
2,air_quality_sensor,pm25,8.62463,7.98809,8.44229,7.93418,8.36249,8.35374,7.79592,7.89869,...,7.98789,8.11566,8.13059,8.32404,8.49763,7.23781,8.18723,6.81143,8.03292,7.03392
3,air_quality_sensor,pm10,18.56675,19.18404,19.15272,18.3664,19.11824,18.63623,19.33407,18.94702,...,18.86432,19.08097,19.18338,18.97513,19.09211,18.30577,18.6687,19.4158,19.96262,19.37357


### Merge data with sdf definition on a single DataFrame

In [None]:
# Merge both information sources

### Compare device dataframes

In [41]:
# Given an unknown device with its SDF definition dataframe, 
# compute a distance metric to other SDF definitions