## Libraries

In [None]:
# data
import numpy as np
import pandas as pd
import xarray as xr

# plotting
import holoviews as hv
from holoviews import opts
import matplotlib.pyplot as plt

# allow "direct" plotting pandas and xarray just in case
import hvplot.pandas  # noqa
import hvplot.xarray  # noqa
pd.options.plotting.backend = 'holoviews'

# setup plotting libs
hv.extension('bokeh', 'matplotlib')
%matplotlib inline

# not necessary but why not
from pandas_profiling import ProfileReport

# repeatability
np.random.seed(123)

# models
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

from tensorflow.python.client import device_lib 
print(device_lib.list_local_devices())

## Read and show raw data


In [None]:
from os import path
raw_data = {k: pd.read_csv('./Data/' + f, sep=',', header=None) for k, f in 
                [('albedo', 'Albedo.csv'), 
                 ('a', 'Element_A_Map.csv'), ('b', 'Element_B_Map.csv'),
                 ('c', 'Element_C_Map.csv'), ('d', 'Element_D_Map.csv')]
           }
for r in raw_data.values():
    r.index.name='y'
    r.columns.name='x'
    r.info()

In [None]:
data = xr.Dataset(raw_data)
data

In [None]:
from functools import partial

plot_image = partial(hv.Image, kdims=['x', 'y'])

def layout_vars(foo):
    return lambda ds: hv.NdLayout({v: foo(ds[v]) for v in ds}, kdims=['variable']).cols(2)

layout_vars(plot_image)(data).opts(opts.Image(width=len(data.x), height=len(data.y), colorbar=True))

In [None]:
def grid_diag_vars(pair, solo):
    return lambda tidy: hv.GridSpace({
        (a,b): pair(tidy, [a, b]) if a!=b else solo(tidy, a)
        for a in tidy.columns
        for b in tidy.columns
    })     

def map_datasets(foo, kdims=None):
    return lambda datasets: hv.HoloMap({
        k: foo(ds)
        for k, ds in datasets.items()
    }, kdims=kdims).collate()

def my_hex(ds):
    return grid_diag_vars( 
        lambda ds, kdims: hv.Overlay([hv.HexTiles(ds, kdims)]),
        lambda ds, a: hv.Overlay([hv.Histogram(np.histogram(ds[a].values, bins=20))])
    )(ds).opts(
        opts.HexTiles(colorbar=True, logz=True, gridsize=20),
        opts.Overlay(width=180, height=180)
    )

my_hex(data.to_dataframe())


In [None]:
def layout_datasets(foo, kdims=None):
    return lambda datasets: hv.NdLayout({
        k: foo(ds)
        for k, ds in datasets.items()
    }, kdims=kdims)
    

def map_variables(foo):
    return lambda ds: hv.HoloMap({
        k: foo(ds[k])
        for k in ds
    }, kdims='variables')

In [None]:
import tensorflow as tf
model_u = tf.keras.models.load_model('./unet.tf')

In [None]:
from tensorflow.keras.utils import plot_model
plot_model(model_u, show_shapes=True, rankdir='LR', dpi=64)

In [None]:
BATCH = 1


def prep_u(ds, pad=0):
    X = ds.albedo.transpose().data
    X = np.pad(X, [(0,), (pad,)], 'wrap')
    X = np.pad(X, [(pad,), (0,)], 'edge')[None, ..., None]
    X /= 100
    Y = ds[list('abcd')].to_array().transpose().data[None, ...]
    Y /= 100
    return X.repeat(BATCH, axis=0), Y.repeat(BATCH, axis=0)
print([x.shape for x in prep_u(data)])

In [None]:
u_data = data.copy()
X, _ = prep_u(data)
pred_u = model_u.predict(X)[0]*100
print(pred_u.shape)
for i, k in enumerate('abcd'):
    prediction = pred_u[..., i].transpose()
    u_data[k] = (('y', 'x'), prediction)

In [None]:
hv.Layout(
    [hv.HexTiles(data.to_dataframe(), ['albedo', element]) * hv.Points(u_data.to_dataframe().sample(1000), ['albedo', element]) 
         for element in 'abcd']
).opts(
    opts.HexTiles(colorbar=True, logz=True, gridsize=20),
    opts.Points(color='white')
)

In [None]:
map_datasets(layout_vars(plot_image), 'model')\
({'original': data, 'unet': u_data})\
.opts(opts.Image(width=len(data.x), height=len(data.y), colorbar=True))

# Deliverables

* Google Colab Jupyter Notebook showing your solution along with the final model score More details regarding the format of the notebook can be found in the sample Google Colab notebook provided for this challenge.  
* A txt file for each element containing your predictions on the test data. Format should be: x_coordinate, y_coordinate, predicted_value. Put name of element in file. An example is provided.
* The final trained model including the model architecture and the trained weights (For example: HDF5 file, .pb file, .pt, .sav file, etc.). You are free to choose Machine Learning Framework of your choice.
* Example submissions can be found https://drive.google.com/drive/folders/1EsqNLc5DzCsaJuvSTYF85gMS5PTVell4?usp=sharing

## Save results

In [None]:
vx_min, vx_max, vy_min, vy_max, x_max, y_max = [300, 430, 140, 270, 720, 360]
test_data = u_data.where(
    np.logical_and(
        np.logical_and(data.x >= vx_min, data.x < vx_max),
        np.logical_and(data.y >= vy_min, data.y < vy_max)
    )
).dropna('x', 'all').dropna('y', 'all')
test_data

In [None]:
out_df = test_data.to_dataframe().reset_index()
out_df

In [None]:
for e in 'abcd':
    out_df[['x', 'y', e]].to_csv(f'./element_{e.upper()}_predictions.txt', index=False, header=False)

In [None]:
u_score = pd.DataFrame([
    {
        'score': mean_squared_error(
            prep_u(ds)[1][0, ..., i]*100,
            model_u.predict(prep_u(ds)[0])[0, ..., i]*100
        ),
        'dataset': ds_k,
        'element': el,
        'model': 'unet'
    }
    for ds_k, ds in [('test', test_data), ('all', data)]
    for i, el in enumerate('abcd')
])
u_score

### Sum of MSE on all and test data:

In [None]:
u_score.groupby('dataset').sum()