### Import Libraries

In [142]:
import numpy as np
import xarray as xr
import h5py 
import random

from sklearn import tree
import graphviz

import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy


### Define Plotting Methods

In [2]:
def addSubplot(data, variable = None, lons = None, lats = None, time = 0, index = 0, size_x = 1, size_y = 1):
	if lons is None or lats is None:
		try:
			lons = data.variables['lon']
			lats = data.variables['lat']
		except Exception:
			print("Longitude/Lattide variables not found!")
			return
	extent = [lons.min(), lons.max(), lats.min(), lats.max()]
	ax = plt.subplot(size_x,size_y,index, projection=ccrs.PlateCarree())
	ax.set_extent(extent, crs=ccrs.PlateCarree())
	ax.coastlines(resolution='50m')
	ax.add_feature(cartopy.feature.LAKES, edgecolor='black')
	ax.add_feature(cartopy.feature.RIVERS)	
	if variable == None:
		ax.contourf(lons, lats, data ,cmap = "jet")
	else:
		ax.contourf(lons, lats, data[variable][time],cmap = "jet")

        
        
def plotData(data, lons, lats):
	plt.figure(figsize=(12, 4))
	extent = [lons.min(), lons.max(), lats.min(), lats.max()]
	ax = plt.axes(projection=ccrs.PlateCarree())
	ax.set_extent(extent)
	ax.gridlines()
	ax.coastlines(resolution='50m')
	ax.add_feature(cartopy.feature.OCEAN)
	ax.add_feature(cartopy.feature.LAND, edgecolor='black')
	ax.add_feature(cartopy.feature.LAKES, edgecolor='black')
	ax.add_feature(cartopy.feature.RIVERS)	
	ax.contourf(lons, lats, data, cmap = "jet")
	plt.show()

##### Read trainig-data

In [5]:
data_directory = "../data/"
filename_labels = "nwcsaf_msevi-nawdex-20160925.nc"
filename_data = "msevi-nawdex-20160925.nc"
filename_mask = "region_masks_for_msevi_nawdex.h5"

sat_data = xr.open_dataset(data_directory+filename_data)
label_data = xr.open_dataset(data_directory+filename_labels)
mask_data = h5py.File(data_directory+filename_mask, 'r')

lons = sat_data['lon']
lats = sat_data['lat']




##### Convert mask to xr-Dataset

In [6]:
dims = ['rows', 'cols']
coords = {'lat': sat_data.coords['lat'], 'lon':sat_data.coords['lon']}

mask_ds = xr.Dataset()
for key in mask_data.keys():
	if key == "_source":
		continue
	m = xr.DataArray([row for row in mask_data[key]], dims = dims, coords = coords, name = key + "_mask")
	mask_ds[key + "_mask"] = m

    

##### Select Data Points for learning

In [163]:
n = 100 
# number of points from the mapped dataset choose for learning 
#(each point is sampled 24x, once for every timeframe)

# extract all indices that lie inside mapped area
mapped_indeces = np.where(mask_ds["mediterranean_mask"])
# create a selection of 'n' index-pairs
selection = random.sample(list(zip(mapped_indeces[0],mapped_indeces[1])),n)
ind_x, ind_y = zip(*selection)


(array([316, 316, 316, ..., 714, 714, 714]), array([1904, 1905, 1906, ..., 2169, 2170, 2171]))


##### Extract trainig samples and corresponding labels

In [144]:
training_data = []
for variable in sat_data.variables:
     # only use relevant channels
     if "bt" in variable:
        # single channel, 24*n values
        entry = np.array(sat_data[variable])[:,ind_x,ind_y].flatten() 
        training_data.append(entry)
        
# reshape from (variables, samples) --> (samples, variables) 
training_data = np.array(training_data)        
sp = training_data.shape
training_data = training_data.flatten().reshape(sp[1],sp[0], order='F')


labels = np.array(label_data["CT"])[:,ind_x,ind_y].flatten()

##### Train classifier with selected data

In [145]:
cl = tree.DecisionTreeClassifier()
cl.fit(training_data, labels)

DecisionTreeClassifier()

##### Read test data

In [175]:
filename_testlabels = "nwcsaf_msevi-nawdex-20160920.nc"
filename_testdata = "msevi-nawdex-20160920.nc"
hour = 0

raw_test_data = xr.open_dataset(data_directory+filename_testdata)
raw_test_label = xr.open_dataset(data_directory+filename_testlabels)


### read test data, apply region-mapping, convert into form usable by classifier
test_data = []
for variable in raw_test_data.variables:
     if "bt" in variable:
        masked_channel = raw_test_data[variable][hour].where(mask_ds["mediterranean_mask"])
        test_data.append(np.array(masked_channel).flatten())
test_data = np.array(test_data)
sp = test_data.shape
test_data = test_data.flatten().reshape(sp[1],sp[0], order='F')

test_label = raw_test_label["CT"][hour].where(mask_ds["mediterranean_mask"])
test_label = np.array(test_label).flatten()



In [189]:
nan_label = np.isnan(test_label)
nan_data = []np.any()

(2787104, 8)

In [155]:
cl.predict(test_data)

ValueError: Input contains NaN, infinity or a value too large for dtype('float32').

### test stuff and such

In [99]:
training_data.shape

(8, 2400)

In [102]:
labels

array([ 2.,  2., 16., ..., 12.,  2.,  8.], dtype=float32)

In [123]:
a = np.array([[1,2,3,4,5],[9,8,9,7,9], [11,11,11,11,11]])
a.flatten().reshape((5,3), order='F')

array([[ 1,  9, 11],
       [ 2,  8, 11],
       [ 3,  9, 11],
       [ 4,  7, 11],
       [ 5,  9, 11]])