# Producing dataset of 4 & 6 sources of sensors

In [1]:
import os
import numpy as np
import pandas as pd
import torch
import math
import random
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
parent_dir = os.path.abspath(os.path.join(os.path.abspath(os.getcwd()), os.pardir))

## Retrieving accuracy and precision from the network with colocation 

In [3]:
colocation_xytv = pd.read_csv(os.path.join(os.path.join(parent_dir,'session','synth_'+str(1000)+'_colocation','tab'),'dataset.csv'))

In [4]:
colocation_xytv

Unnamed: 0,x,y,truth,sensor_hq,sensor_mq,sensor_lq,agg_truth,pixel_hq,pixel_mq,pixel_lq
0,0.465432,0.892691,1.343322,1.352247,1.302355,2.043883,1.243962,1.329613,1.319657,0.697116
1,0.165826,0.810097,1.118653,1.138284,0.859120,0.450859,1.141355,1.185423,1.271540,1.520892
2,0.056733,0.830376,1.644142,1.615263,1.856093,1.652803,1.595883,1.627686,1.739910,2.081171
3,0.722536,0.529950,0.064479,0.093942,0.103122,0.149790,0.077085,0.130224,0.106049,0.054240
4,0.564269,0.793415,1.773022,1.786646,1.721548,1.418837,1.804475,1.812188,1.709401,1.418179
...,...,...,...,...,...,...,...,...,...,...
995,0.335858,0.430754,0.441513,0.426325,0.450427,0.505225,0.580311,0.613375,0.657225,0.839779
996,0.892133,0.913745,0.872882,0.876233,0.745825,0.948859,0.988130,1.043117,0.910423,0.783324
997,0.970323,0.782899,1.923507,1.878314,2.052697,1.791483,1.808084,1.837442,2.168936,1.968636
998,0.041494,0.697756,1.829618,1.765584,2.016011,1.614518,1.803214,1.861870,1.846983,2.969815


## Accuracy and precision against high-quality sensors

In [5]:
def rmse(pred,target):
    return np.sqrt(np.mean((pred - target)**2))
def bias(pred,target):
    return np.mean(pred - target)
def variance(pred,target):
    b = bias(pred,target)
    return np.mean(np.abs(pred - target - b))**2

In [6]:
rmse_sensors_hq = rmse(colocation_xytv.loc[:,'sensor_hq'],colocation_xytv.loc[:,'sensor_hq'])

In [7]:
variance_sensors_hq = variance(colocation_xytv.loc[:,'sensor_hq'],colocation_xytv.loc[:,'sensor_hq'])

In [8]:
rmse_sensors_mq = rmse(colocation_xytv.loc[:,'sensor_mq'],colocation_xytv.loc[:,'sensor_hq'])

In [9]:
variance_sensors_mq = variance(colocation_xytv.loc[:,'sensor_mq'],colocation_xytv.loc[:,'sensor_hq'])

In [10]:
rmse_sensors_lq = rmse(colocation_xytv.loc[:,'sensor_lq'],colocation_xytv.loc[:,'sensor_hq'])

In [11]:
variance_sensors_lq = variance(colocation_xytv.loc[:,'sensor_lq'],colocation_xytv.loc[:,'sensor_hq'])

In [12]:
rmse_pixels_hq = rmse(colocation_xytv.loc[:,'pixel_hq'],colocation_xytv.loc[:,'sensor_hq'])

In [13]:
variance_pixels_hq = variance(colocation_xytv.loc[:,'pixel_hq'],colocation_xytv.loc[:,'sensor_hq'])

In [14]:
rmse_pixels_mq = rmse(colocation_xytv.loc[:,'pixel_mq'],colocation_xytv.loc[:,'sensor_mq'])

In [15]:
variance_pixels_mq = variance(colocation_xytv.loc[:,'pixel_mq'],colocation_xytv.loc[:,'sensor_mq'])

In [16]:
rmse_pixels_lq = rmse(colocation_xytv.loc[:,'pixel_lq'],colocation_xytv.loc[:,'sensor_hq'])

In [17]:
variance_pixels_lq = variance(colocation_xytv.loc[:,'pixel_lq'],colocation_xytv.loc[:,'sensor_hq'])

In [18]:
ap = pd.DataFrame(data={'RMSE': [rmse_sensors_hq ,rmse_sensors_mq ,rmse_sensors_lq ,rmse_pixels_hq ,rmse_pixels_mq,rmse_pixels_lq],
                        'variance': [variance_sensors_hq,variance_sensors_mq,variance_sensors_lq,variance_pixels_hq,variance_pixels_mq,variance_pixels_lq]})

In [19]:
s = pd.Series(['sensor_hq','sensor_mq','sensor_lq','pixel_hq','pixel_mq','pixel_lq'])

In [20]:
ap = ap.set_index([s])

In [21]:
ap

Unnamed: 0,RMSE,variance
sensor_hq,0.0,0.0
sensor_mq,0.123151,0.007994
sensor_lq,0.329484,0.055298
pixel_hq,0.083938,0.003582
pixel_mq,0.178597,0.017255
pixel_lq,0.343726,0.062667


## Producing dataset of 4 sources of sensors  

In [22]:
def produce_4sources(all_xyv,key,ap,name,parent_dir):
    
    all_xyv.loc[:,"ref"] = all_xyv.loc[:,"sensor_hq"]
    all_xyv.loc[:,"rmse_ref"] = ap.loc['sensor_hq','RMSE']
    all_xyv.loc[:,"variance_ref"] = ap.loc['sensor_hq','variance']
    
    melted = all_xyv.melt(id_vars=["x","y","ref","rmse_ref","variance_ref"],value_vars=key,ignore_index=True)
    melted.loc[melted["variable"]=="sensor_hq", "rmse"] = ap.loc['sensor_hq','RMSE']
    melted.loc[melted["variable"]=="sensor_lq", "rmse"] = ap.loc['sensor_lq','RMSE']
    melted.loc[melted["variable"]=="pixel_hq", "rmse"] = ap.loc['pixel_hq','RMSE']
    melted.loc[melted["variable"]=="pixel_lq", "rmse"] = ap.loc['pixel_lq','RMSE']
    
    melted.loc[melted["variable"]=="sensor_hq", "variance"] = ap.loc['sensor_hq','variance']
    melted.loc[melted["variable"]=="sensor_lq", "variance"] = ap.loc['sensor_lq','variance']
    melted.loc[melted["variable"]=="pixel_hq", "variance"] = ap.loc['pixel_hq','variance']
    melted.loc[melted["variable"]=="pixel_lq", "variance"] = ap.loc['pixel_lq','variance']
    
    newpath = os.path.join(parent_dir,'session',name+'_4s','tab')
    if not os.path.exists(newpath):
        os.makedirs(newpath)
    f = os.path.join(newpath,'dataset.csv')
    melted.to_csv(f,index=False)

In [23]:
def produce_set_4sources(nb_sampling,key,ap,parent_dir):
    
    all_xyv = pd.read_csv(os.path.join(os.path.join(parent_dir,'session','synth_'+str(nb_sampling)+'_X_train','tab'),'dataset.csv'))
    produce_4sources(all_xyv,key,ap,'synth_'+str(nb_sampling)+'_X_train',parent_dir)
    
    all_xyv = pd.read_csv(os.path.join(os.path.join(parent_dir,'session','synth_'+str(nb_sampling)+'_Y_train','tab'),'dataset.csv'))
    produce_4sources(all_xyv,key,ap,'synth_'+str(nb_sampling)+'_Y_train',parent_dir)
    
    all_xyv = pd.read_csv(os.path.join(os.path.join(parent_dir,'session','synth_'+str(nb_sampling)+'_X_valid','tab'),'dataset.csv'))
    produce_4sources(all_xyv,key,ap,'synth_'+str(nb_sampling)+'_X_valid',parent_dir)
    
    all_xyv = pd.read_csv(os.path.join(os.path.join(parent_dir,'session','synth_'+str(nb_sampling)+'_Y_valid','tab'),'dataset.csv'))
    produce_4sources(all_xyv,key,ap,'synth_'+str(nb_sampling)+'_Y_valid',parent_dir)
    
    all_xyv = pd.read_csv(os.path.join(os.path.join(parent_dir,'session','synth_'+str(nb_sampling)+'_X_eval','tab'),'dataset.csv'))
    produce_4sources(all_xyv,key,ap,'synth_'+str(nb_sampling)+'_X_eval',parent_dir)
    
    all_xyv = pd.read_csv(os.path.join(os.path.join(parent_dir,'session','synth_'+str(nb_sampling)+'_Y_eval','tab'),'dataset.csv'))
    produce_4sources(all_xyv,key,ap,'synth_'+str(nb_sampling)+'_Y_eval',parent_dir)
    

In [24]:
key = ['sensor_hq','sensor_lq','pixel_hq','pixel_lq']
produce_set_4sources(1000,key,ap,parent_dir)

In [25]:
all_xyv = pd.read_csv(os.path.join(os.path.join(parent_dir,'session','synth_all_'+str(6400),'tab'),'dataset.csv'))
produce_4sources(all_xyv,key,ap,'synth_all_'+str(6400),parent_dir)

## Producing dataset of 6 sources of sensors  

In [26]:
def produce_6sources(all_xyv,key,ap,name,parent_dir):
    
    all_xyv.loc[:,"ref"] = all_xyv.loc[:,"sensor_hq"]
    all_xyv.loc[:,"rmse_ref"] = ap.loc['sensor_hq','RMSE']
    all_xyv.loc[:,"variance_ref"] = ap.loc['sensor_hq','variance']
    
    melted = all_xyv.melt(id_vars=["x","y","ref","rmse_ref","variance_ref"],value_vars=key,ignore_index=True)
    melted.loc[melted["variable"]=="sensor_hq", "rmse"] = ap.loc['sensor_hq','RMSE']
    melted.loc[melted["variable"]=="sensor_mq", "rmse"] = ap.loc['sensor_mq','RMSE']
    melted.loc[melted["variable"]=="sensor_lq", "rmse"] = ap.loc['sensor_lq','RMSE']
    melted.loc[melted["variable"]=="pixel_hq", "rmse"] = ap.loc['pixel_hq','RMSE']
    melted.loc[melted["variable"]=="pixel_mq", "rmse"] = ap.loc['pixel_mq','RMSE']
    melted.loc[melted["variable"]=="pixel_lq", "rmse"] = ap.loc['pixel_lq','RMSE']
    
    melted.loc[melted["variable"]=="sensor_hq", "variance"] = ap.loc['sensor_hq','variance']
    melted.loc[melted["variable"]=="sensor_mq", "variance"] = ap.loc['sensor_mq','variance']
    melted.loc[melted["variable"]=="sensor_lq", "variance"] = ap.loc['sensor_lq','variance']
    melted.loc[melted["variable"]=="pixel_hq", "variance"] = ap.loc['pixel_hq','variance']
    melted.loc[melted["variable"]=="pixel_mq", "variance"] = ap.loc['pixel_mq','variance']
    melted.loc[melted["variable"]=="pixel_lq", "variance"] = ap.loc['pixel_lq','variance']
    
    newpath = os.path.join(parent_dir,'session',name+'_6s','tab')
    if not os.path.exists(newpath):
        os.makedirs(newpath)
    f = os.path.join(newpath,'dataset.csv')
    melted.to_csv(f,index=False)

In [27]:
def produce_set_6sources(nb_sampling,key,ap,parent_dir):
    
    all_xyv = pd.read_csv(os.path.join(os.path.join(parent_dir,'session','synth_'+str(nb_sampling)+'_X_train','tab'),'dataset.csv'))
    produce_6sources(all_xyv,key,ap,'synth_'+str(nb_sampling)+'_X_train',parent_dir)
    
    all_xyv = pd.read_csv(os.path.join(os.path.join(parent_dir,'session','synth_'+str(nb_sampling)+'_Y_train','tab'),'dataset.csv'))
    produce_6sources(all_xyv,key,ap,'synth_'+str(nb_sampling)+'_Y_train',parent_dir)
    
    all_xyv = pd.read_csv(os.path.join(os.path.join(parent_dir,'session','synth_'+str(nb_sampling)+'_X_valid','tab'),'dataset.csv'))
    produce_6sources(all_xyv,key,ap,'synth_'+str(nb_sampling)+'_X_valid',parent_dir)
    
    all_xyv = pd.read_csv(os.path.join(os.path.join(parent_dir,'session','synth_'+str(nb_sampling)+'_Y_valid','tab'),'dataset.csv'))
    produce_6sources(all_xyv,key,ap,'synth_'+str(nb_sampling)+'_Y_valid',parent_dir)
    
    all_xyv = pd.read_csv(os.path.join(os.path.join(parent_dir,'session','synth_'+str(nb_sampling)+'_X_eval','tab'),'dataset.csv'))
    produce_6sources(all_xyv,key,ap,'synth_'+str(nb_sampling)+'_X_eval',parent_dir)
    
    all_xyv = pd.read_csv(os.path.join(os.path.join(parent_dir,'session','synth_'+str(nb_sampling)+'_Y_eval','tab'),'dataset.csv'))
    produce_6sources(all_xyv,key,ap,'synth_'+str(nb_sampling)+'_Y_eval',parent_dir)
    

In [28]:
key = ['sensor_hq','sensor_mq','sensor_lq','pixel_hq','pixel_mq','pixel_lq']
produce_set_6sources(1000,key,ap,parent_dir)

In [29]:
all_xyv = pd.read_csv(os.path.join(os.path.join(parent_dir,'session','synth_all_'+str(6400),'tab'),'dataset.csv'))
produce_6sources(all_xyv,key,ap,'synth_all_'+str(6400),parent_dir)