# Producing dataset of 4 & 6 sources of sensors

In [1]:
import os
import numpy as np
import pandas as pd
import torch
import math
import random
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec

In [2]:
parent_dir = os.path.abspath(os.path.join(os.path.abspath(os.getcwd()), os.pardir))

## Retrieving accuracy and precision from the network with colocation 

In [3]:
colocation_xytv = pd.read_csv(os.path.join(os.path.join(parent_dir,'session','synth_'+str(1000)+'_colocation','tab'),'dataset.csv'))

In [4]:
colocation_xytv

Unnamed: 0,x,y,truth,sensor_hq,sensor_mq,sensor_lq,agg_truth,pixel_hq,pixel_mq,pixel_lq
0,0.658373,0.835299,1.035633,1.037240,1.080460,1.319427,1.093916,1.127492,1.049841,1.531538
1,0.167042,0.629502,0.776232,0.805247,0.685393,1.082418,0.691692,0.738987,0.640979,0.900058
2,0.557678,0.918025,1.116958,1.133778,1.061428,1.500772,1.067781,1.156635,1.034316,0.647430
3,0.474924,0.894371,1.354865,1.375943,1.391476,1.629584,1.329232,1.390496,1.406361,1.951204
4,0.969739,0.182687,1.795820,1.802798,1.359185,2.614918,1.656136,1.697991,1.429999,1.587076
...,...,...,...,...,...,...,...,...,...,...
995,0.462176,0.879585,1.415784,1.401587,1.109184,1.370176,1.243962,1.255739,1.398957,1.507946
996,0.257352,0.696635,0.893856,0.899156,1.010148,0.905981,0.867768,0.905118,1.077857,1.114976
997,0.986004,0.130502,1.526831,1.542854,1.270754,1.483116,1.525781,1.582301,1.569167,1.631478
998,0.549039,0.224442,1.882460,1.891232,1.839731,2.700714,1.875260,1.901088,1.678164,1.509848


## Accuracy and precision against high-quality sensors

In [5]:
def rmse(pred,target):
    return np.sqrt(np.mean((pred - target)**2))
def bias(pred,target):
    return np.mean(pred - target)
def variance(pred,target):
    b = bias(pred,target)
    return np.mean(np.abs(pred - target - b))**2

In [6]:
rmse_sensors_hq = rmse(colocation_xytv.loc[:,'sensor_hq'],colocation_xytv.loc[:,'sensor_hq'])

In [7]:
variance_sensors_hq = variance(colocation_xytv.loc[:,'sensor_hq'],colocation_xytv.loc[:,'sensor_hq'])

In [8]:
rmse_sensors_mq = rmse(colocation_xytv.loc[:,'sensor_mq'],colocation_xytv.loc[:,'sensor_hq'])

In [9]:
variance_sensors_mq = variance(colocation_xytv.loc[:,'sensor_mq'],colocation_xytv.loc[:,'sensor_hq'])

In [10]:
rmse_sensors_lq = rmse(colocation_xytv.loc[:,'sensor_lq'],colocation_xytv.loc[:,'sensor_hq'])

In [11]:
variance_sensors_lq = variance(colocation_xytv.loc[:,'sensor_lq'],colocation_xytv.loc[:,'sensor_hq'])

In [12]:
rmse_pixels_hq = rmse(colocation_xytv.loc[:,'pixel_hq'],colocation_xytv.loc[:,'sensor_hq'])

In [13]:
variance_pixels_hq = variance(colocation_xytv.loc[:,'pixel_hq'],colocation_xytv.loc[:,'sensor_hq'])

In [14]:
rmse_pixels_mq = rmse(colocation_xytv.loc[:,'pixel_mq'],colocation_xytv.loc[:,'sensor_mq'])

In [15]:
variance_pixels_mq = variance(colocation_xytv.loc[:,'pixel_mq'],colocation_xytv.loc[:,'sensor_mq'])

In [16]:
rmse_pixels_lq = rmse(colocation_xytv.loc[:,'pixel_lq'],colocation_xytv.loc[:,'sensor_hq'])

In [17]:
variance_pixels_lq = variance(colocation_xytv.loc[:,'pixel_lq'],colocation_xytv.loc[:,'sensor_hq'])

In [18]:
ap = pd.DataFrame(data={'RMSE': [rmse_sensors_hq ,rmse_sensors_mq ,rmse_sensors_lq ,rmse_pixels_hq ,rmse_pixels_mq,rmse_pixels_lq],
                        'variance': [variance_sensors_hq,variance_sensors_mq,variance_sensors_lq,variance_pixels_hq,variance_pixels_mq,variance_pixels_lq]})

In [19]:
s = pd.Series(['sensor_hq','sensor_mq','sensor_lq','pixel_hq','pixel_mq','pixel_lq'])

In [20]:
ap = ap.set_index([s])

In [21]:
ap

Unnamed: 0,RMSE,variance
sensor_hq,0.0,0.0
sensor_mq,0.119047,0.007231
sensor_lq,0.335685,0.057084
pixel_hq,0.08255,0.003558
pixel_mq,0.171811,0.015894
pixel_lq,0.352288,0.065278


## Producing dataset of 4 sources of sensors  

In [22]:
def produce_4sources(all_xyv,key,ap,name,parent_dir):
    
    all_xyv.loc[:,"ref"] = all_xyv.loc[:,"sensor_hq"]
    all_xyv.loc[:,"rmse_ref"] = ap.loc['sensor_hq','RMSE']
    all_xyv.loc[:,"variance_ref"] = ap.loc['sensor_hq','variance']
    
    melted = all_xyv.melt(id_vars=["x","y","ref","rmse_ref","variance_ref"],value_vars=key,ignore_index=True)
    melted.loc[melted["variable"]=="sensor_hq", "rmse"] = ap.loc['sensor_hq','RMSE']
    melted.loc[melted["variable"]=="sensor_lq", "rmse"] = ap.loc['sensor_lq','RMSE']
    melted.loc[melted["variable"]=="pixel_hq", "rmse"] = ap.loc['pixel_hq','RMSE']
    melted.loc[melted["variable"]=="pixel_lq", "rmse"] = ap.loc['pixel_lq','RMSE']
    
    melted.loc[melted["variable"]=="sensor_hq", "variance"] = ap.loc['sensor_hq','variance']
    melted.loc[melted["variable"]=="sensor_lq", "variance"] = ap.loc['sensor_lq','variance']
    melted.loc[melted["variable"]=="pixel_hq", "variance"] = ap.loc['pixel_hq','variance']
    melted.loc[melted["variable"]=="pixel_lq", "variance"] = ap.loc['pixel_lq','variance']
    
    newpath = os.path.join(parent_dir,'session',name+'_4s','tab')
    if not os.path.exists(newpath):
        os.makedirs(newpath)
    f = os.path.join(newpath,'dataset.csv')
    melted.to_csv(f,index=False)

In [23]:
def produce_set_4sources(nb_sampling,key,ap,parent_dir):
    
    all_xyv = pd.read_csv(os.path.join(os.path.join(parent_dir,'session','synth_'+str(nb_sampling)+'_X_train','tab'),'dataset.csv'))
    produce_4sources(all_xyv,key,ap,'synth_'+str(nb_sampling)+'_X_train',parent_dir)
    
    all_xyv = pd.read_csv(os.path.join(os.path.join(parent_dir,'session','synth_'+str(nb_sampling)+'_Y_train','tab'),'dataset.csv'))
    produce_4sources(all_xyv,key,ap,'synth_'+str(nb_sampling)+'_Y_train',parent_dir)
    
    all_xyv = pd.read_csv(os.path.join(os.path.join(parent_dir,'session','synth_'+str(nb_sampling)+'_X_valid','tab'),'dataset.csv'))
    produce_4sources(all_xyv,key,ap,'synth_'+str(nb_sampling)+'_X_valid',parent_dir)
    
    all_xyv = pd.read_csv(os.path.join(os.path.join(parent_dir,'session','synth_'+str(nb_sampling)+'_Y_valid','tab'),'dataset.csv'))
    produce_4sources(all_xyv,key,ap,'synth_'+str(nb_sampling)+'_Y_valid',parent_dir)
    
    all_xyv = pd.read_csv(os.path.join(os.path.join(parent_dir,'session','synth_'+str(nb_sampling)+'_X_eval','tab'),'dataset.csv'))
    produce_4sources(all_xyv,key,ap,'synth_'+str(nb_sampling)+'_X_eval',parent_dir)
    
    all_xyv = pd.read_csv(os.path.join(os.path.join(parent_dir,'session','synth_'+str(nb_sampling)+'_Y_eval','tab'),'dataset.csv'))
    produce_4sources(all_xyv,key,ap,'synth_'+str(nb_sampling)+'_Y_eval',parent_dir)
    

In [24]:
key = ['sensor_hq','sensor_lq','pixel_hq','pixel_lq']
produce_set_4sources(1000,key,ap,parent_dir)

In [25]:
all_xyv = pd.read_csv(os.path.join(os.path.join(parent_dir,'session','synth_all_'+str(6400),'tab'),'dataset.csv'))
produce_4sources(all_xyv,key,ap,'synth_all_'+str(6400),parent_dir)

## Producing dataset of 6 sources of sensors  

In [26]:
def produce_6sources(all_xyv,key,ap,name,parent_dir):
    
    all_xyv.loc[:,"ref"] = all_xyv.loc[:,"sensor_hq"]
    all_xyv.loc[:,"rmse_ref"] = ap.loc['sensor_hq','RMSE']
    all_xyv.loc[:,"variance_ref"] = ap.loc['sensor_hq','variance']
    
    melted = all_xyv.melt(id_vars=["x","y","ref","rmse_ref","variance_ref"],value_vars=key,ignore_index=True)
    melted.loc[melted["variable"]=="sensor_hq", "rmse"] = ap.loc['sensor_hq','RMSE']
    melted.loc[melted["variable"]=="sensor_mq", "rmse"] = ap.loc['sensor_mq','RMSE']
    melted.loc[melted["variable"]=="sensor_lq", "rmse"] = ap.loc['sensor_lq','RMSE']
    melted.loc[melted["variable"]=="pixel_hq", "rmse"] = ap.loc['pixel_hq','RMSE']
    melted.loc[melted["variable"]=="pixel_mq", "rmse"] = ap.loc['pixel_mq','RMSE']
    melted.loc[melted["variable"]=="pixel_lq", "rmse"] = ap.loc['pixel_lq','RMSE']
    
    melted.loc[melted["variable"]=="sensor_hq", "variance"] = ap.loc['sensor_hq','variance']
    melted.loc[melted["variable"]=="sensor_mq", "variance"] = ap.loc['sensor_mq','variance']
    melted.loc[melted["variable"]=="sensor_lq", "variance"] = ap.loc['sensor_lq','variance']
    melted.loc[melted["variable"]=="pixel_hq", "variance"] = ap.loc['pixel_hq','variance']
    melted.loc[melted["variable"]=="pixel_mq", "variance"] = ap.loc['pixel_mq','variance']
    melted.loc[melted["variable"]=="pixel_lq", "variance"] = ap.loc['pixel_lq','variance']
    
    newpath = os.path.join(parent_dir,'session',name+'_6s','tab')
    if not os.path.exists(newpath):
        os.makedirs(newpath)
    f = os.path.join(newpath,'dataset.csv')
    melted.to_csv(f,index=False)

In [27]:
def produce_set_6sources(nb_sampling,key,ap,parent_dir):
    
    all_xyv = pd.read_csv(os.path.join(os.path.join(parent_dir,'session','synth_'+str(nb_sampling)+'_X_train','tab'),'dataset.csv'))
    produce_6sources(all_xyv,key,ap,'synth_'+str(nb_sampling)+'_X_train',parent_dir)
    
    all_xyv = pd.read_csv(os.path.join(os.path.join(parent_dir,'session','synth_'+str(nb_sampling)+'_Y_train','tab'),'dataset.csv'))
    produce_6sources(all_xyv,key,ap,'synth_'+str(nb_sampling)+'_Y_train',parent_dir)
    
    all_xyv = pd.read_csv(os.path.join(os.path.join(parent_dir,'session','synth_'+str(nb_sampling)+'_X_valid','tab'),'dataset.csv'))
    produce_6sources(all_xyv,key,ap,'synth_'+str(nb_sampling)+'_X_valid',parent_dir)
    
    all_xyv = pd.read_csv(os.path.join(os.path.join(parent_dir,'session','synth_'+str(nb_sampling)+'_Y_valid','tab'),'dataset.csv'))
    produce_6sources(all_xyv,key,ap,'synth_'+str(nb_sampling)+'_Y_valid',parent_dir)
    
    all_xyv = pd.read_csv(os.path.join(os.path.join(parent_dir,'session','synth_'+str(nb_sampling)+'_X_eval','tab'),'dataset.csv'))
    produce_6sources(all_xyv,key,ap,'synth_'+str(nb_sampling)+'_X_eval',parent_dir)
    
    all_xyv = pd.read_csv(os.path.join(os.path.join(parent_dir,'session','synth_'+str(nb_sampling)+'_Y_eval','tab'),'dataset.csv'))
    produce_6sources(all_xyv,key,ap,'synth_'+str(nb_sampling)+'_Y_eval',parent_dir)
    

In [28]:
key = ['sensor_hq','sensor_mq','sensor_lq','pixel_hq','pixel_mq','pixel_lq']
produce_set_6sources(1000,key,ap,parent_dir)

In [29]:
all_xyv = pd.read_csv(os.path.join(os.path.join(parent_dir,'session','synth_all_'+str(6400),'tab'),'dataset.csv'))
produce_6sources(all_xyv,key,ap,'synth_all_'+str(6400),parent_dir)