# Investigation of geophysical sensor data to inform priors

Since we don't have a really great idea of what constitutes a good set of priors for real data, here I try my best to sort out what is going on using what I hope will be simple, but robust, assumptions.

In [None]:
import GPy
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pyproj
from functions import *
%matplotlib inline

## Noise and length scale characteristics for gravity and magnetism

We've been running with some set of priors for gravity and magnetism, but in all fairness we have no idea what those should be.  We know they're both linear sensors that integrate over rock properties, with a 3-D sensitivity profile that gets broader with depth.  So by fitting a GP to them, we get some idea of the noise, and a lower limit on the relevant length scale.  Since they're on a grid, we could also consider the autocorrelation.

This isn't really meant to be a Bayesian analysis, but it's meant to give us some idea of the order of magnitude of the noise in a model that's flexible enough to respond to changes, but that insists on smoothness so we can pick off the delta-function component of the covariance.

# Data dictionaries

In [None]:
length_grav = 0.05
length_mag = 0.015

lat_centre = -24.85
lon_centre = 116.1

gda94 = pyproj.Proj(init='epsg:4283')
mgaz50 = pyproj.Proj(init='epsg:28350')
agd84 = pyproj.Proj(init='epsg:4203')
#agd84 = pyproj.Proj(init='eps:20350')

degree_conversion = 111111
length_grav_metres = length_grav * degree_conversion
length_mag_metres = length_mag * degree_conversion

eastings_centre, northings_centre = pyproj.transform(gda94, mgaz50, lon_centre, lat_centre)
agd_x_centre, agd_y_centre = pyproj.transform(gda94, agd84, lon_centre, lat_centre)

In [None]:
"""y2 = (lat_centre + length_grav)
_, y2_boundary = pyproj.transform(gda94, mgaz50, lon_centre, y2)
print(y2_boundary)
print(northings_centre)
print(y2_boundary - northings_centre)
length_grav_metres = y2_boundary - northings_centre"""

In [None]:
dict_data_set1 = {
    'dir_data': '/Users/davidkohn/dev/obsidian/data/dataset1',
    'grav': {
        'fname': 'gravity_400m_Gascoyne.txt',
        'key_lat': 'Latitude',
        'key_lon': 'Longitude',
        'key_y': 'grid_code',
    },
    'mag': {
        'fname': 'mag_TMI_gascoyne.txt',
        'key_lat': 'Latitude',
        'key_lon': 'Longitude',
        'key_y': 'grid_code',
    },
}

dict_data_set2 = {
    'dir_data': '/Users/davidkohn/dev/obsidian/data/dataset2',
    'grav_north': {
        'fname': 'Gascoyne_North_Bouguer_gravity_400m_XYZ.txt',
        'key_lat': 'Y',
        'key_lon': 'X',
        'key_y': 'GASCOYNE_NORTH_1',
    },
    'grav_south': {
        'fname': 'Gascoyne_South_Bouguer_gravity_500m_XYZ.txt',
        'key_lat': 'Y',
        'key_lon': 'X',
        'key_y': 'GASCOYNE_SOUTH_1',
    },
    'mag': {
        'fname': 'Bangemall_mag_125m_XYZ.txt',
        'key_lat': 'Y',
        'key_lon': 'X',
        'key_y': 'MAG_PD',
    },
}

dict_data_set3 = {
    'dir_data': '/Users/davidkohn/dev/obsidian/data/dataset3',
    'grav_north': {
        'fname': 'Gascoyne_North_2010_gravity_line_data_all.xlsx',
        'key_lat': 'COORDINATE LATITUDE GDA94 (DECIMAL DEGREES)',
        'key_lon': 'COORDINATE LONGITUDE GDA94 (DECIMAL DEGREES)',
        'key_z': 'GROUND LEVEL ELEVATION (M)',
        'key_y': 'COMPLETE SPHERICAL CAP BOUGUER ANOMALY 2.67 t/m^3 (GU)',
    },
    'grav_south': {
        'fname': 'Gascoyne_South_2010_gravity_line_data_all.xlsx',
        'key_lat': 'COORDINATE LATITUDE GDA94 (DECIMAL DEGREES)',
        'key_lon': 'COORDINATE LONGITUDE GDA94 (DECIMAL DEGREES)',
        'key_z': 'GROUND LEVEL ELEVATION (M)',
        'key_y': 'COMPLETE SPHERICAL CAP BOUGUER ANOMALY 2.67 t/m^3 (GU)',
    },
    'mag1': {
        'fname': 'bangemall_1.asc',
        'key_lat': 'latitude',
        'key_lon': 'longitude',
        'key_y': 'mag corrected',
    },
    'mag2': {
        'fname': 'bangemall_2.asc',
        'key_lat': 'latitude',
        'key_lon': 'longitude',
        'key_y': 'mag corrected',
    },
}

dict_data_set4 = {
    'dir_data': '/Users/davidkohn/dev/obsidian/data/dataset4',
    'grav': {
        'fname': 'Gascoyne_North_2010_gravity_line_data_all.xlsx',
        'key_lat': 'COORDINATE LATITUDE GDA94 (DECIMAL DEGREES)',
        'key_lon': 'COORDINATE LONGITUDE GDA94 (DECIMAL DEGREES)',
        'key_z': 'GROUND LEVEL ELEVATION (M)',
        'key_y': 'COMPLETE SPHERICAL CAP BOUGUER ANOMALY 2.67 t/m^3 (GU)',
    },
    'mag': {
        'fname': 'bangemall_1_cropped_WGS84.csv',
        'key_lat': 'Northing_m',
        'key_lon': 'Easting_m',
        'key_y': 'mag_corrected_nT',
    },
}

# Dataset1: grav

In [None]:
dict_data_set = dict_data_set1
sub_key = 'grav'
length = length_grav

fpath_data, key_x_coord, key_y_coord, key_y = get_vars(dict_data_set, sub_key)
data = get_data(fpath_data)

data_centred = get_data_that_is_in_square_around_centre(
    data,
    [lon_centre, lat_centre],
    [key_x_coord, key_y_coord],
    [length, length]
)

X, Y, model = run_gp(data_centred, key_x_coord, key_y_coord, key_y)

In [None]:
f = model.plot()
plt.savefig('/Users/davidkohn/Desktop/dataset1-grav.png')

In [None]:
print(data_centred[key_y_coord].max())
print(data_centred[key_y_coord].min())

# Dataset1: mag

In [None]:
dict_data_set = dict_data_set1
sub_key = 'mag'
length = length_mag

fpath_data, key_x_coord, key_y_coord, key_y = get_vars(dict_data_set, sub_key)
data = get_data(fpath_data)

data_centred = get_data_that_is_in_square_around_centre(
    data,
    [lon_centre, lat_centre],
    [key_x_coord, key_y_coord],
    [length, length]
)

X, Y, model = run_gp(data_centred, key_x_coord, key_y_coord, key_y)

In [None]:
f = model.plot()
plt.savefig('/Users/davidkohn/Desktop/dataset1-mag.png')

# Dataset2: grav_north

In [None]:
dict_data_set = dict_data_set2
sub_key = 'grav_north'
length = length_grav_metres

fpath_data, key_x_coord, key_y_coord, key_y = get_vars(dict_data_set, sub_key)
data = get_data(fpath_data)

data_centred = get_data_that_is_in_square_around_centre(
    data,
    [eastings_centre, northings_centre],
    [key_x_coord, key_y_coord],
    [length, length]
)

data_centred[key_y] = data_centred[key_y] - data_centred[key_y].mean()

#X, Y, model = run_gp(data_centred, key_x_coord, key_y_coord, key_y)

new_key_x_coord = 'lon'
new_key_y_coord = 'lat'

data_centred[new_key_x_coord] = 0
data_centred[new_key_y_coord] = 0
for idx, (x, y) in enumerate(zip(data_centred[key_x_coord], data_centred[key_y_coord])):
    lon, lat = pyproj.transform(mgaz50, gda94, x, y)
    #print(lon, lat)
    data_centred.loc[idx, new_key_x_coord] = lon
    data_centred.loc[idx, new_key_y_coord] = lat

X, Y, model = run_gp(data_centred, new_key_x_coord, new_key_y_coord, key_y)

In [None]:
print(data_centred[key_y_coord].max())
print(data_centred[key_y_coord].min())

print(pyproj.transform(mgaz50, gda94, data_centred[key_x_coord].max(), data_centred[key_y_coord].max()))
print(pyproj.transform(mgaz50, gda94, data_centred[key_x_coord].min(), data_centred[key_y_coord].min()))

In [None]:
data_new = centre_data(data, length, 'lon', 'lat', lat_centre, lon_centre)

In [None]:
f = model.plot()
plt.savefig('/Users/davidkohn/Desktop/dataset2-grav-north.png')

# Dataset2: mag

In [None]:
dict_data_set = dict_data_set2
sub_key = 'mag'
length = 1500

fpath_data, key_x_coord, key_y_coord, key_y = get_vars(dict_data_set, sub_key)
data = get_data(fpath_data)

data_centred = get_data_that_is_in_square_around_centre(
    data,
    [eastings_centre, northings_centre],
    [key_x_coord, key_y_coord],
    [length, length]
)

data_centred[key_y] = data_centred[key_y].astype('float64')
data_centred[key_y] = data_centred[key_y] - data_centred[key_y].mean()

#X, Y, model = run_gp(data_centred, key_x_coord, key_y_coord, key_y)

new_key_x_coord = 'lon'
new_key_y_coord = 'lat'

data_centred[new_key_x_coord] = 0
data_centred[new_key_y_coord] = 0
for idx, (x, y) in enumerate(zip(data_centred[key_x_coord], data_centred[key_y_coord])):
    lon, lat = pyproj.transform(mgaz50, gda94, x, y)
    data_centred.loc[idx, new_key_x_coord] = lon
    data_centred.loc[idx, new_key_y_coord] = lat

X, Y, model = run_gp(data_centred, new_key_x_coord, new_key_y_coord, key_y)

In [None]:
f = model.plot()
plt.savefig('/Users/davidkohn/Desktop/dataset2-mag.png')

# Dataset3: grav_north

In [None]:
dict_data_set = dict_data_set3
sub_key = 'grav_north'
length = 0.10

fpath_data, key_x_coord, key_y_coord, key_y = get_vars(dict_data_set, sub_key)
data = get_data(fpath_data, ftype = 'xlsx')

In [None]:
data_out = data.loc[:, [key_y, key_y_coord, key_x_coord]].reset_index(drop = True).reset_index()
data_out.columns = ['FID', 'grid_code', 'Latitude', 'Longitude']

In [None]:
data_out.to_csv('Gascoyne_North_2010_gravity_line_data_all.csv', header = True, index = False)

In [None]:
dict_data_set = dict_data_set3
sub_key = 'grav_north'
length = 0.10

fpath_data, key_x_coord, key_y_coord, key_y = get_vars(dict_data_set, sub_key)
data = get_data(fpath_data, ftype = 'xlsx')

data_centred = get_data_that_is_in_square_around_centre(
    data,
    [lon_centre, lat_centre],
    [key_x_coord, key_y_coord],
    [length, length]
)

data_centred[key_y] = data_centred[key_y] - data_centred[key_y].mean()

X, Y, model = run_gp(data_centred, key_x_coord, key_y_coord, key_y)

# Dataset3: mag1

In [None]:
headers = [
    'line',
    'flight',
    'direction',
    'date',
    'fiducial',
    'time',
    'recovery status',
    'easting',
    'northing',
    'longitude',
    'latitude',
    'mag raw',
    'igrf',
    'dirunal',
    'mag corrected',
    'total count raw',
    'potassium raw',
    'uranium raw',
    'thorium raw',
    'cosmic',
    'total count corrected',
    'potassium corrected',
    'uranium corrected',
    'thorium corrected',
    'radar altimeter',
    'barometric altimeter',
    'gps height',
    'end'
]

In [None]:
dict_data_set = dict_data_set3
sub_key = 'mag1'
length = length_mag
#length = 2000

fpath_data, key_x_coord, key_y_coord, key_y = get_vars(dict_data_set, sub_key)
data = pd.read_csv(fpath_data, sep = '\s+', header = None)
data.columns = headers

In [None]:
data_new = data
data_new['LatLong'] = data_new.apply(
    lambda row:  pyproj.transform(
        mgaz50, gda94, 
        row[key_x_coord], row[key_y_coord]), 
    axis = 1
)

In [None]:
data_new['lat'] = data_new.apply(
    lambda row:  row[0], 
    axis = 1
)

In [None]:
data_new['lon'] = data_new.apply(
    lambda row:  row[1], 
    axis = 1
)

In [None]:
dict_data_set = dict_data_set3
sub_key = 'mag1'
length = length_mag
#length = 2000

fpath_data, key_x_coord, key_y_coord, key_y = get_vars(dict_data_set, sub_key)

In [None]:
data_out = pd.read_csv('/Users/davidkohn/dev/obsidian/data/dataset3/bangemall_1.csv')

In [None]:
data_out = data_new.loc[:, [key_y, key_y_coord, key_x_coord]].reset_index(drop = True).reset_index()
data_out.columns = ['FID', 'grid_code', 'Latitude', 'Longitude']

In [None]:
data_out.to_csv('bangemall_1.csv', index = False, header = True)

In [None]:
data_centred = get_data_that_is_in_square_around_centre(
    data,
    [agd_x_centre, agd_y_centre],
    [key_x_coord, key_y_coord],
    [length, length]
)

data_centred[key_y] = data_centred[key_y] - data_centred[key_y].mean()

new_key_x_coord = 'lon'
new_key_y_coord = 'lat'

data_centred[new_key_x_coord] = 0
data_centred[new_key_y_coord] = 0
for idx, (x, y) in enumerate(zip(data_centred[key_x_coord], data_centred[key_y_coord])):
    lon, lat = pyproj.transform(mgaz50, gda94, x, y)
    data_centred.loc[idx, new_key_x_coord] = lon
    data_centred.loc[idx, new_key_y_coord] = lat

X, Y, model = run_gp(data_centred, key_x_coord, key_y_coord, key_y, ARD = True)

In [None]:
f = model.plot()
plt.savefig('/Users/davidkohn/Desktop/dataset3-mag-length-20km-ard.png')

In [None]:
print(model.Mat32.lengthscale)

In [None]:
np.arange(
    data_centred[key_x_coord].min(),
    data_centred[key_x_coord].max(),
    step = model.Mat32.lengthscale[1]
)

In [None]:
data_centred[key_x_coord].unique().shape

In [None]:
data_centred.shape

In [None]:
l = model.Mat32.lengthscale[1]

In [None]:
data_centred[key_x_coord].min()

In [None]:
print(data_centred[key_x_coord].max())
print(data_centred[key_x_coord].min())
diff = data_centred[key_x_coord].max() - data_centred[key_x_coord].min()
print(diff)
print(l)

In [None]:
diff / l

# Dataset 4

## grav

In [None]:
dict_data_set = dict_data_set4
sub_key = 'grav'
length = 0.10

fpath_data, key_x_coord, key_y_coord, key_y = get_vars(dict_data_set, sub_key)
data = get_data(fpath_data, ftype = 'xlsx')

In [None]:
data_out = data.loc[
    :, [key_y, key_y_coord, key_x_coord, dict_data_set[sub_key]['key_z']]
].reset_index(drop = True).reset_index()
data_out.columns = ['FID', 'grid_code', 'Latitude', 'Longitude', 'Z']

In [None]:
fname = 'Gascoyne_North_2010_gravity_line_data_all.csv'
fpath = os.path.join(dict_data_set4['dir_data'], fname)

In [None]:
data_out.to_csv(fpath, header = True, index = False)

In [None]:
data_centred = get_data_that_is_in_square_around_centre(
    data,
    [agd_x_centre, agd_y_centre],
    [key_x_coord, key_y_coord],
    [length, length]
)

data_centred[key_y] = data_centred[key_y] - data_centred[key_y].mean()

new_key_x_coord = 'lon'
new_key_y_coord = 'lat'

data_centred[new_key_x_coord] = 0
data_centred[new_key_y_coord] = 0
for idx, (x, y) in enumerate(zip(data_centred[key_x_coord], data_centred[key_y_coord])):
    lon, lat = pyproj.transform(mgaz50, gda94, x, y)
    data_centred.loc[idx, new_key_x_coord] = lon
    data_centred.loc[idx, new_key_y_coord] = lat

X, Y, model = run_gp(data_centred, key_x_coord, key_y_coord, key_y, ARD = True)

In [None]:
model.plot()
plt.savefig('/Users/davidkohn/desktop/dataset4-gp-grav.png')

## mag

In [None]:
dict_data_set = dict_data_set4
sub_key = 'mag'
length = length_mag
#length = 2000

fpath_data, key_x_coord, key_y_coord, key_y = get_vars(dict_data_set, sub_key)
data = pd.read_csv(fpath_data)

In [None]:
data_new = data
data_new['LatLong'] = data_new.apply(
    lambda row:  pyproj.transform(
        mgaz50, gda94, 
        row[key_x_coord], row[key_y_coord]), 
    axis = 1
)

In [None]:
data_new['lat'] = data_new.apply(
    lambda row:  row['LatLong'][0], 
    axis = 1
)

In [None]:
data_new['lon'] = data_new.apply(
    lambda row:  row['LatLong'][1], 
    axis = 1
)

In [None]:
data_out = data_new.loc[:, [key_y, 'lon', 'lat']].reset_index(drop = True).reset_index()
data_out.columns = ['FID', 'grid_code', 'Latitude', 'Longitude']

In [None]:
fname = 'bangemall_1.csv'
fpath = os.path.join(dict_data_set4['dir_data'], fname)

In [None]:
data_out.to_csv(fpath, header = True, index = False)

In [None]:
key_y_coord = 'Latitude'
key_x_coord = 'Longitude'
key_y = 'grid_code'

data_centred = get_data_that_is_in_square_around_centre(
    data_out,
    [agd_x_centre, agd_y_centre],
    [key_x_coord, key_y_coord],
    [length, length]
)

data_centred[key_y] = data_centred[key_y] - data_centred[key_y].mean()

new_key_x_coord = 'lon'
new_key_y_coord = 'lat'

data_centred[new_key_x_coord] = 0
data_centred[new_key_y_coord] = 0
for idx, (x, y) in enumerate(zip(data_centred[key_x_coord], data_centred[key_y_coord])):
    lon, lat = pyproj.transform(mgaz50, gda94, x, y)
    data_centred.loc[idx, new_key_x_coord] = lon
    data_centred.loc[idx, new_key_y_coord] = lat

X, Y, model = run_gp(data_centred, key_x_coord, key_y_coord, key_y, ARD = True)

In [None]:
model.plot()
plt.savefig('/Users/davidkohn/desktop/dataset4-gp-mag.png')