In [11]:
import pandas as pd
import numpy as np
import sys
import matplotlib.pyplot as plt
import scipy.io as sio
from scipy import stats
import seaborn as sns

get_ipython().magic(u'matplotlib inline')

## Load metrolab data

In [2]:
# load metrolab data for testing

metrolab = pd.read_csv("../../Data/cmag_data/lego_metrolab_processed/processed_data_static_from_Pos1_to_Pos8_2999samples_max35Amp.csv",
                      skiprows=1, header=None)

metrolab.columns = ["x", "y", "z", 
                                 "I1", "I2","I3", "I4","I5", "I6","I7", "I8",
                                 "Bx", "By", "Bz"]
print("Total data shape is ", metrolab.shape)
metrolab.head()

Total data shape is  (214992, 14)


Unnamed: 0,x,y,z,I1,I2,I3,I4,I5,I6,I7,I8,Bx,By,Bz
0,-102.52,-29.6,28.2,20.794,-25.054,9.5788,-10.744,-8.3818,-7.8729,-2.317,23.666,-67.79,3.0321,-1.6435
1,-102.52,-93.6,28.2,20.794,-25.054,9.5788,-10.744,-8.3818,-7.8729,-2.317,23.666,-66.519,2.222,-6.7333
2,-25.48,-101.6,28.2,20.794,-25.054,9.5788,-10.744,-8.3818,-7.8729,-2.317,23.666,-75.868,-3.5986,-9.551
3,-25.48,-37.6,28.2,20.794,-25.054,9.5788,-10.744,-8.3818,-7.8729,-2.317,23.666,-75.815,4.3741,6.1569
4,-59.9,-66.98,66.6,20.794,-25.054,9.5788,-10.744,-8.3818,-7.8729,-2.317,23.666,-70.934,6.5159,-2.2175


## Missing data (removed)

In [3]:
# missing values in metrolab data

nan_rows = metrolab[metrolab.isnull().any(axis=1)]

print("{} rows contain nan value".format(len(nan_rows)))
nan_rows

18 rows contain nan value


Unnamed: 0,x,y,z,I1,I2,I3,I4,I5,I6,I7,I8,Bx,By,Bz
112194,25.48,95.6,28.2,-13.35,7.511,-0.5929,-17.047,21.577,22.594,11.308,-18.542,,,
112195,25.48,31.6,28.2,-13.35,7.511,-0.5929,-17.047,21.577,22.594,11.308,-18.542,,,
112196,102.52,23.6,28.2,-13.35,7.511,-0.5929,-17.047,21.577,22.594,11.308,-18.542,,,
112197,102.52,87.6,28.2,-13.35,7.511,-0.5929,-17.047,21.577,22.594,11.308,-18.542,,,
112198,68.1,58.22,66.6,-13.35,7.511,-0.5929,-17.047,21.577,22.594,11.308,-18.542,,,
112199,25.48,95.6,95.1,-13.35,7.511,-0.5929,-17.047,21.577,22.594,11.308,-18.542,,,
112200,25.48,31.6,95.1,-13.35,7.511,-0.5929,-17.047,21.577,22.594,11.308,-18.542,,,
112201,102.52,23.6,95.1,-13.35,7.511,-0.5929,-17.047,21.577,22.594,11.308,-18.542,,,
112202,102.52,87.6,95.1,-13.35,7.511,-0.5929,-17.047,21.577,22.594,11.308,-18.542,,,
158976,-102.52,-29.6,28.2,6.0697,5.1737,-3.6407,8.7283,-9.2057,0.5971,-6.6388,7.5047,,,


In [4]:
# remove missing samples
metrolab = metrolab.dropna()
assert 214992-18==len(metrolab), "dimension mismatch"

print("metrolab dataset has {} samples ready for testing.".format(len(metrolab)))

metrolab dataset has 214974 samples ready for testing.


In [5]:
# save the metrolab dataset so far

metrolab.to_csv("metrolab_remove_nan.csv", index = False)

In [6]:
metrolab.describe()

Unnamed: 0,x,y,z,I1,I2,I3,I4,I5,I6,I7,I8,Bx,By,Bz
count,214974.0,214974.0,214974.0,214974.0,214974.0,214974.0,214974.0,214974.0,214974.0,214974.0,214974.0,214974.0,214974.0,214974.0
mean,0.455556,-3.153333,124.605225,-0.161421,-0.543056,-0.01311,0.254383,-0.153578,-0.364767,0.253618,-0.296891,0.758652,-0.106323,1.347145
std,73.597624,69.594648,69.934131,11.739934,11.742063,11.794816,11.96677,11.808545,11.85735,12.034072,11.664312,35.148218,31.053232,26.375037
min,-102.52,-101.6,28.2,-34.27,-34.39,-32.888,-32.785,-33.638,-34.782,-34.465,-34.263,-179.6,-160.25,-165.31
25%,-59.9,-66.98,66.6,-6.958,-7.4886,-6.7046,-6.7158,-7.2282,-7.3906,-6.4505,-6.93,-18.05475,-16.062,-10.21675
50%,0.0,-3.0,153.0,-0.02555,-0.56315,-0.11795,0.11445,-0.15365,-0.38745,0.21,-0.2373,0.671305,-0.197625,1.2548
75%,68.1,58.22,191.4,6.531,6.0522,7.0098,7.4158,6.902,6.3798,7.3045,6.4085,19.50075,15.72575,12.699
max,102.52,95.6,219.9,34.808,33.658,34.277,33.812,33.156,32.9,33.722,32.656,178.73,159.03,165.67


In [7]:
# so change metrolab's unit to match the sensor grid

# unit transformation based on that x,y,z in millimetres, and Bx, By, Bz in millitesla.

metrolab[['x','y','z']] /= 1000 # change sensor location coordinates in meters
metrolab[['Bx','By','Bz']] /= 1000 # change field strength in Tesla
metrolab.head()

Unnamed: 0,x,y,z,I1,I2,I3,I4,I5,I6,I7,I8,Bx,By,Bz
0,-0.10252,-0.0296,0.0282,20.794,-25.054,9.5788,-10.744,-8.3818,-7.8729,-2.317,23.666,-0.06779,0.003032,-0.001643
1,-0.10252,-0.0936,0.0282,20.794,-25.054,9.5788,-10.744,-8.3818,-7.8729,-2.317,23.666,-0.066519,0.002222,-0.006733
2,-0.02548,-0.1016,0.0282,20.794,-25.054,9.5788,-10.744,-8.3818,-7.8729,-2.317,23.666,-0.075868,-0.003599,-0.009551
3,-0.02548,-0.0376,0.0282,20.794,-25.054,9.5788,-10.744,-8.3818,-7.8729,-2.317,23.666,-0.075815,0.004374,0.006157
4,-0.0599,-0.06698,0.0666,20.794,-25.054,9.5788,-10.744,-8.3818,-7.8729,-2.317,23.666,-0.070934,0.006516,-0.002217


In [8]:
metrolab.describe()

Unnamed: 0,x,y,z,I1,I2,I3,I4,I5,I6,I7,I8,Bx,By,Bz
count,214974.0,214974.0,214974.0,214974.0,214974.0,214974.0,214974.0,214974.0,214974.0,214974.0,214974.0,214974.0,214974.0,214974.0
mean,0.000456,-0.003153,0.124605,-0.161421,-0.543056,-0.01311,0.254383,-0.153578,-0.364767,0.253618,-0.296891,0.000759,-0.000106,0.001347
std,0.073598,0.069595,0.069934,11.739934,11.742063,11.794816,11.96677,11.808545,11.85735,12.034072,11.664312,0.035148,0.031053,0.026375
min,-0.10252,-0.1016,0.0282,-34.27,-34.39,-32.888,-32.785,-33.638,-34.782,-34.465,-34.263,-0.1796,-0.16025,-0.16531
25%,-0.0599,-0.06698,0.0666,-6.958,-7.4886,-6.7046,-6.7158,-7.2282,-7.3906,-6.4505,-6.93,-0.018055,-0.016062,-0.010217
50%,0.0,-0.003,0.153,-0.02555,-0.56315,-0.11795,0.11445,-0.15365,-0.38745,0.21,-0.2373,0.000671,-0.000198,0.001255
75%,0.0681,0.05822,0.1914,6.531,6.0522,7.0098,7.4158,6.902,6.3798,7.3045,6.4085,0.019501,0.015726,0.012699
max,0.10252,0.0956,0.2199,34.808,33.658,34.277,33.812,33.156,32.9,33.722,32.656,0.17873,0.15903,0.16567


In [9]:
# save metrolab data
metrolab.to_csv("metrolab_unified_units.csv", index = False)