In [5]:
import pandas as pd
import numpy as np
import dask.array as da
from dask_image.ndfilters import generic_filter as d_gf
from scipy.stats import skew

import matplotlib.pyplot as plt

from helper import fast as fs
from helper import index as slowHelpers

In [6]:
features_path = "../../../features/features/"
output_path ="../../../features/"

In [13]:
import datetime    
import pickle


features_path = "../../../features/features/"
output_path ="../../../features/"

def load_raw(zone_number, chunk_size=(800,800)):
    
    labels = np.load(f"{features_path}/Labels3m_{zone_number}.npy")
    hpmf = np.load(f"{features_path}/HPMF_{zone_number}.npy")
    skyview = np.load(f"{features_path}/SkyViewFactor_{zone_number}.npy")
    impundment = np.load(f"{features_path}/Impoundment_{zone_number}.npy")
    slope = np.load(f"{features_path}/Slope_{zone_number}.npy")
    DEM = np.load(f"{features_path}/DEM_{zone_number}.npy")
    data_frame = pd.DataFrame({"label_3m":      labels.reshape(-1),
                               "hpmf_raw" :      hpmf.reshape(-1), 
                               "skyview_raw":    skyview.reshape(-1),
                               "impundment_raw": impundment.reshape(-1),
                               "slope_raw":      slope.reshape(-1)
                              })
    
    try:
        data_frame = pickle.load(open(f"{output_path}zone_{zone_number}.pickle", "rb"))
        data_frame["DEM_raw"] = DEM.reshape(-1)
    except (OSError, IOError) as e:
        pass
    
    
    #Generating impoundment stream amplification to use with several features
    streamAmp = slowHelpers.streamAmplification(impundment)
    
    print("starting-DEM:        ", datetime.datetime.now())
    #DEM
    #Manually made
    if "DEM_ditch_detection" not in data_frame.columns:
        data_frame["DEM_ditch_detection"] = slowHelpers.DEMDitchDetection(DEM).reshape(-1)
    if "DEM_ditch_detection_no_streams" not in data_frame.columns:
        data_frame["DEM_ditch_detection_no_streams"] = slowHelpers.impoundmentDEMStreamRemoval(data_frame["DEM_ditch_detection"].values.reshape((2997,2620)), streamAmp).reshape(-1)
    #OBS, få första arg ovanför att kallas från featuren som skapas ovanför
    
    print("starting-skyview:    ", datetime.datetime.now())
    
    # Skyview
    # Manualy made
    if "conic_mean" not in data_frame.columns:
        data_frame["conic_mean"] = fs.conicMean(skyview, 5 , 0.975).reshape(-1)
    if "skyview_non_ditch" not in data_frame.columns:
        data_frame["skyview_non_ditch"] = fs.skyViewNonDitchAmplification(skyview).reshape(-1)
    if "skyview_gabor" not in data_frame.columns:
        data_frame["skyview_gabor"] = fs.skyViewGabor(skyview).reshape(-1)
    if "conic_mean_no_streams" not in data_frame.columns:
        data_frame["conic_mean_no_streams"] = slowHelpers.skyViewHPMFGaborStreamRemoval(data_frame["conic_mean"].values.reshape((2997,2620)), streamAmp).reshape(-1)
    if "skyview_gabor_no_streams" not in data_frame.columns:
        data_frame["skyview_gabor_no_streams"] = slowHelpers.skyViewHPMFGaborStreamRemoval(data_frame["skyview_gabor"].values.reshape((2997,2620)), streamAmp).reshape(-1)
    #OBS, få första arg ovanför att kallas från featuren som skapas ovanför
    
    # Mass produced
    
    if "skyview_mean_2" not in data_frame.columns:
        data_frame["skyview_mean_2"] = d_gf(da.from_array(skyview, chunks=chunk_size), np.nanmean, footprint=fs.create_circular_mask(2)).compute(scheduler='processes').reshape(-1)
    if "skyview_mean_3" not in data_frame.columns:
        data_frame["skyview_mean_3"] = d_gf(da.from_array(skyview, chunks=chunk_size), np.nanmean, footprint=fs.create_circular_mask(3)).compute(scheduler='processes').reshape(-1)
    if "skyview_mean_4" not in data_frame.columns:
        data_frame["skyview_mean_4"] = d_gf(da.from_array(skyview, chunks=chunk_size), np.nanmean, footprint=fs.create_circular_mask(4)).compute(scheduler='processes').reshape(-1)
    if "skyview_mean_6" not in data_frame.columns:
        data_frame["skyview_mean_6"] = d_gf(da.from_array(skyview, chunks=chunk_size), np.nanmean, footprint=fs.create_circular_mask(6)).compute(scheduler='processes').reshape(-1)
    
    if "skyview_median_2" not in data_frame.columns:
        data_frame["skyview_median_2"] = d_gf(da.from_array(skyview, chunks=chunk_size), np.nanmedian, footprint=fs.create_circular_mask(2)).compute(scheduler='processes').reshape(-1)
    if "skyview_median_4" not in data_frame.columns:
        data_frame["skyview_median_4"] = d_gf(da.from_array(skyview, chunks=chunk_size), np.nanmedian, footprint=fs.create_circular_mask(4)).compute(scheduler='processes').reshape(-1)
    if "skyview_median_6" not in data_frame.columns:        
        data_frame["skyview_median_6"] = d_gf(da.from_array(skyview, chunks=chunk_size), np.nanmedian, footprint=fs.create_circular_mask(6)).compute(scheduler='processes').reshape(-1)
        
    
    if "skyview_min_2" not in data_frame.columns:        
        data_frame["skyview_min_2"] = d_gf(da.from_array(skyview, chunks=chunk_size), np.amin, footprint=fs.create_circular_mask(2)).compute(scheduler='processes').reshape(-1)
    if "skyview_min_4" not in data_frame.columns:        
        data_frame["skyview_min_4"] = d_gf(da.from_array(skyview, chunks=chunk_size), np.amin, footprint=fs.create_circular_mask(4)).compute(scheduler='processes').reshape(-1)
    if "skyview_min_6" not in data_frame.columns:        
        data_frame["skyview_min_6"] = d_gf(da.from_array(skyview, chunks=chunk_size), np.amin, footprint=fs.create_circular_mask(6)).compute(scheduler='processes').reshape(-1)        
        
    if "skyview_max_2" not in data_frame.columns:        
        data_frame["skyview_max_2"] = d_gf(da.from_array(skyview, chunks=chunk_size), np.amax, footprint=fs.create_circular_mask(2)).compute(scheduler='processes').reshape(-1)
    if "skyview_max_4" not in data_frame.columns:        
        data_frame["skyview_max_4"] = d_gf(da.from_array(skyview, chunks=chunk_size), np.amax, footprint=fs.create_circular_mask(4)).compute(scheduler='processes').reshape(-1)
    if "skyview_max_6" not in data_frame.columns:        
        data_frame["skyview_max_6"] = d_gf(da.from_array(skyview, chunks=chunk_size), np.amax, footprint=fs.create_circular_mask(6)).compute(scheduler='processes').reshape(-1)        
    
    
    if "skyview_std_2" not in data_frame.columns:        
        data_frame["skyview_std_2"] = d_gf(da.from_array(skyview, chunks=chunk_size), np.nanstd, footprint=fs.create_circular_mask(2)).compute(scheduler='processes').reshape(-1)
    if "skyview_std_4" not in data_frame.columns:        
        data_frame["skyview_std_4"] = d_gf(da.from_array(skyview, chunks=chunk_size), np.nanstd, footprint=fs.create_circular_mask(4)).compute(scheduler='processes').reshape(-1)
    if "skyview_std_6" not in data_frame.columns:        
        data_frame["skyview_std_6"] = d_gf(da.from_array(skyview, chunks=chunk_size), np.nanstd, footprint=fs.create_circular_mask(6)).compute(scheduler='processes').reshape(-1)
        
    if "skyview_skew_2" not in data_frame.columns:        
        data_frame["skyview_skew_2"] = d_gf(da.from_array(skyview, chunks=chunk_size), skew, footprint=fs.create_circular_mask(2)).compute(scheduler='processes').reshape(-1)
    
    
    
    print("starting-impoundment:", datetime.datetime.now())
    
    # Impoundment
    # Manualy made
    if "impundment_amplified" not in data_frame.columns:
        data_frame["impundment_amplified"]= fs.impoundmentAmplification(impundment).reshape(-1)
    if "impoundment_amplified_no_streams" not in data_frame.columns:
        data_frame["impoundment_amplified_no_streams"] = slowHelpers.impoundmentDEMStreamRemoval(data_frame["impundment_amplified"].values.reshape((2997,2620)), streamAmp).reshape(-1)
    #OBS, få första arg ovanför att kallas från featuren som skapas ovanför
    
    # Mass produced
    if "impundment_mean_2" not in data_frame.columns:
        data_frame["impundment_mean_2"] = d_gf(da.from_array(impundment, chunks=chunk_size), np.nanmean, footprint=fs.create_circular_mask(2)).compute(scheduler='processes').reshape(-1)
    if "impundment_mean_3" not in data_frame.columns:
        data_frame["impundment_mean_3"] = d_gf(da.from_array(impundment, chunks=chunk_size), np.nanmean, footprint=fs.create_circular_mask(3)).compute(scheduler='processes').reshape(-1)
    if "impundment_mean_4" not in data_frame.columns:
        data_frame["impundment_mean_4"] = d_gf(da.from_array(impundment, chunks=chunk_size), np.nanmean, footprint=fs.create_circular_mask(4)).compute(scheduler='processes').reshape(-1)
    if "impundment_mean_6" not in data_frame.columns:
        data_frame["impundment_mean_6"] = d_gf(da.from_array(impundment, chunks=chunk_size), np.nanmean, footprint=fs.create_circular_mask(6)).compute(scheduler='processes').reshape(-1)
    
    if "impundment_median_2" not in data_frame.columns:
        data_frame["impundment_median_2"] = d_gf(da.from_array(impundment, chunks=chunk_size), np.nanmedian, footprint=fs.create_circular_mask(2)).compute(scheduler='processes').reshape(-1)
    if "impundment_median_4" not in data_frame.columns:
        data_frame["impundment_median_4"] = d_gf(da.from_array(impundment, chunks=chunk_size), np.nanmedian, footprint=fs.create_circular_mask(4)).compute(scheduler='processes').reshape(-1)
    if "impundment_median_6" not in data_frame.columns:
        data_frame["impundment_median_6"] = d_gf(da.from_array(impundment, chunks=chunk_size), np.nanmedian, footprint=fs.create_circular_mask(6)).compute(scheduler='processes').reshape(-1)
    
    if "impundment_min_2" not in data_frame.columns:        
        data_frame["impundment_min_2"] = d_gf(da.from_array(impundment, chunks=chunk_size), np.amin, footprint=fs.create_circular_mask(2)).compute(scheduler='processes').reshape(-1)
    if "impundment_min_4" not in data_frame.columns:        
        data_frame["impundment_min_4"] = d_gf(da.from_array(impundment, chunks=chunk_size), np.amin, footprint=fs.create_circular_mask(4)).compute(scheduler='processes').reshape(-1)
    if "impundment_min_6" not in data_frame.columns:        
        data_frame["impundment_min_6"] = d_gf(da.from_array(impundment, chunks=chunk_size), np.amin, footprint=fs.create_circular_mask(6)).compute(scheduler='processes').reshape(-1)
    
    if "impundment_max_2" not in data_frame.columns:        
        data_frame["impundment_max_2"] = d_gf(da.from_array(impundment, chunks=chunk_size), np.amax, footprint=fs.create_circular_mask(2)).compute(scheduler='processes').reshape(-1)
    if "impundment_max_4" not in data_frame.columns:        
        data_frame["impundment_max_4"] = d_gf(da.from_array(impundment, chunks=chunk_size), np.amax, footprint=fs.create_circular_mask(4)).compute(scheduler='processes').reshape(-1)
    if "impundment_max_6" not in data_frame.columns:        
        data_frame["impundment_max_6"] = d_gf(da.from_array(impundment, chunks=chunk_size), np.amax, footprint=fs.create_circular_mask(6)).compute(scheduler='processes').reshape(-1)
    
    
    if "impundment_std_2" not in data_frame.columns:
        data_frame["impundment_std_2"] = d_gf(da.from_array(impundment, chunks=chunk_size), np.nanstd, footprint=fs.create_circular_mask(2)).compute(scheduler='processes').reshape(-1)
    if "impundment_std_4" not in data_frame.columns:
        data_frame["impundment_std_4"] = d_gf(da.from_array(impundment, chunks=chunk_size), np.nanstd, footprint=fs.create_circular_mask(4)).compute(scheduler='processes').reshape(-1)
    if "impundment_std_6" not in data_frame.columns:
        data_frame["impundment_std_6"] = d_gf(da.from_array(impundment, chunks=chunk_size), np.nanstd, footprint=fs.create_circular_mask(6)).compute(scheduler='processes').reshape(-1)
    
    if "impundment_skew_2" not in data_frame.columns:
        data_frame["impundment_skew_2"] = d_gf(da.from_array(impundment, chunks=chunk_size), skew, footprint=fs.create_circular_mask(2)).compute(scheduler='processes').reshape(-1)
    
    
    print("starting-hpmf:       ", datetime.datetime.now())
    # Hpmf
    # Manualy made
    if "hpmf_filter" not in data_frame.columns:
        data_frame["hpmf_filter"] = fs.hpmfFilter(hpmf).reshape(-1)
    if "hpmf_gabor" not in data_frame.columns:
        data_frame["hpmf_gabor"] = fs.skyViewGabor(hpmf).reshape(-1)
    if "hpmf_gabor_no_streams" not in data_frame.columns:
        data_frame["hpmf_gabor_no_streams"] = slowHelpers.skyViewHPMFGaborStreamRemoval(data_frame["hpmf_gabor"].values.reshape((2997,2620)), streamAmp).reshape(-1)
    if "hpmf_filter_no_streams" not in data_frame.columns:
        data_frame["hpmf_filter_no_streams"] = slowHelpers.skyViewHPMFGaborStreamRemoval(fs.hpmfFilter(hpmf).compute(scheduler='processes'), streamAmp).reshape(-1)

    
    # Mass produced
    if "hpmf_mean_2" not in data_frame.columns:
        data_frame["hpmf_mean_2"] = d_gf(da.from_array(hpmf, chunks=chunk_size), np.nanmean, footprint=fs.create_circular_mask(2)).compute(scheduler='processes').reshape(-1)
    if "hpmf_mean_3" not in data_frame.columns:
        data_frame["hpmf_mean_3"] = d_gf(da.from_array(hpmf, chunks=chunk_size), np.nanmean, footprint=fs.create_circular_mask(3)).compute(scheduler='processes').reshape(-1)
    if "hpmf_mean_4" not in data_frame.columns:
        data_frame["hpmf_mean_4"] = d_gf(da.from_array(hpmf, chunks=chunk_size), np.nanmean, footprint=fs.create_circular_mask(4)).compute(scheduler='processes').reshape(-1)
    if "hpmf_mean_6" not in data_frame.columns:
        data_frame["hpmf_mean_6"] = d_gf(da.from_array(hpmf, chunks=chunk_size), np.nanmean, footprint=fs.create_circular_mask(6)).compute(scheduler='processes').reshape(-1)
    
    if "hpmf_median_2" not in data_frame.columns:
        data_frame["hpmf_median_2"] = d_gf(da.from_array(hpmf, chunks=chunk_size), np.nanmedian, footprint=fs.create_circular_mask(2)).compute(scheduler='processes').reshape(-1)
    if "hpmf_median_4" not in data_frame.columns:
        data_frame["hpmf_median_4"] = d_gf(da.from_array(hpmf, chunks=chunk_size), np.nanmedian, footprint=fs.create_circular_mask(4)).compute(scheduler='processes').reshape(-1)
    if "hpmf_median_6" not in data_frame.columns:
        data_frame["hpmf_median_6"] = d_gf(da.from_array(hpmf, chunks=chunk_size), np.nanmedian, footprint=fs.create_circular_mask(6)).compute(scheduler='processes').reshape(-1)
    
    if "hpmf_min_2" not in data_frame.columns:        
        data_frame["hpmf_min_2"] = d_gf(da.from_array(hpmf, chunks=chunk_size), np.amin, footprint=fs.create_circular_mask(2)).compute(scheduler='processes').reshape(-1)
    if "hpmf_min_4" not in data_frame.columns:        
        data_frame["hpmf_min_4"] = d_gf(da.from_array(hpmf, chunks=chunk_size), np.amin, footprint=fs.create_circular_mask(4)).compute(scheduler='processes').reshape(-1)
    if "hpmf_min_6" not in data_frame.columns:        
        data_frame["hpmf_min_6"] = d_gf(da.from_array(hpmf, chunks=chunk_size), np.amin, footprint=fs.create_circular_mask(6)).compute(scheduler='processes').reshape(-1)
    
    if "hpmf_max_2" not in data_frame.columns:        
        data_frame["hpmf_max_2"] = d_gf(da.from_array(hpmf, chunks=chunk_size), np.amax, footprint=fs.create_circular_mask(2)).compute(scheduler='processes').reshape(-1)
    if "hpmf_max_4" not in data_frame.columns:        
        data_frame["hpmf_max_4"] = d_gf(da.from_array(hpmf, chunks=chunk_size), np.amax, footprint=fs.create_circular_mask(4)).compute(scheduler='processes').reshape(-1)
    if "hpmf_max_6" not in data_frame.columns:        
        data_frame["hpmf_max_6"] = d_gf(da.from_array(hpmf, chunks=chunk_size), np.amax, footprint=fs.create_circular_mask(6)).compute(scheduler='processes').reshape(-1)
    
    
    if "hpmf_std_2" not in data_frame.columns:
        data_frame["hpmf_std_2"] = d_gf(da.from_array(hpmf, chunks=chunk_size), np.nanstd, footprint=fs.create_circular_mask(2)).compute(scheduler='processes').reshape(-1)
    if "hpmf_std_4" not in data_frame.columns:
        data_frame["hpmf_std_4"] = d_gf(da.from_array(hpmf, chunks=chunk_size), np.nanstd, footprint=fs.create_circular_mask(4)).compute(scheduler='processes').reshape(-1)
    if "hpmf_std_6" not in data_frame.columns:
        data_frame["hpmf_std_6"] = d_gf(da.from_array(hpmf, chunks=chunk_size), np.nanstd, footprint=fs.create_circular_mask(6)).compute(scheduler='processes').reshape(-1)
    
    if "hpmf_skew_2" not in data_frame.columns:
        data_frame["hpmf_skew_2"] = d_gf(da.from_array(hpmf, chunks=chunk_size), skew, footprint=fs.create_circular_mask(2)).compute(scheduler='processes').reshape(-1)
    
    print("starting-slope:      ", datetime.datetime.now())
    # Slope
    # Manualy made
    if "slope_non_ditch" not in data_frame.columns:
        data_frame["slope_non_ditch"] = fs.slopeNonDitchAmplification(slope).reshape(-1)
    
    # Mass produced
    if "slope_mean_2" not in data_frame.columns:
        data_frame["slope_mean_2"] = d_gf(da.from_array(slope, chunks=chunk_size), np.nanmean, footprint=fs.create_circular_mask(2)).compute(scheduler='processes').reshape(-1)
    if "slope_mean_3" not in data_frame.columns:
        data_frame["slope_mean_3"] = d_gf(da.from_array(slope, chunks=chunk_size), np.nanmean, footprint=fs.create_circular_mask(3)).compute(scheduler='processes').reshape(-1)
    if "slope_mean_4" not in data_frame.columns:
        data_frame["slope_mean_4"] = d_gf(da.from_array(slope, chunks=chunk_size), np.nanmean, footprint=fs.create_circular_mask(4)).compute(scheduler='processes').reshape(-1)
    if "slope_mean_6" not in data_frame.columns:
        data_frame["slope_mean_6"] = d_gf(da.from_array(slope, chunks=chunk_size), np.nanmean, footprint=fs.create_circular_mask(6)).compute(scheduler='processes').reshape(-1)
    
    if "slope_median_2" not in data_frame.columns:
        data_frame["slope_median_2"] = d_gf(da.from_array(slope, chunks=chunk_size), np.nanmedian, footprint=fs.create_circular_mask(2)).compute(scheduler='processes').reshape(-1)
    if "slope_median_4" not in data_frame.columns:
        data_frame["slope_median_4"] = d_gf(da.from_array(slope, chunks=chunk_size), np.nanmedian, footprint=fs.create_circular_mask(4)).compute(scheduler='processes').reshape(-1)
    if "slope_median_6" not in data_frame.columns:
        data_frame["slope_median_6"] = d_gf(da.from_array(slope, chunks=chunk_size), np.nanmedian, footprint=fs.create_circular_mask(6)).compute(scheduler='processes').reshape(-1)
    
    if "slope_min_2" not in data_frame.columns:        
        data_frame["slope_min_2"] = d_gf(da.from_array(slope, chunks=chunk_size), np.amin, footprint=fs.create_circular_mask(2)).compute(scheduler='processes').reshape(-1)
    if "slope_min_4" not in data_frame.columns:        
        data_frame["slope_min_4"] = d_gf(da.from_array(slope, chunks=chunk_size), np.amin, footprint=fs.create_circular_mask(4)).compute(scheduler='processes').reshape(-1)
    if "slope_min_6" not in data_frame.columns:        
        data_frame["slope_min_6"] = d_gf(da.from_array(slope, chunks=chunk_size), np.amin, footprint=fs.create_circular_mask(6)).compute(scheduler='processes').reshape(-1)
    
    if "slope_max_2" not in data_frame.columns:        
        data_frame["slope_max_2"] = d_gf(da.from_array(slope, chunks=chunk_size), np.amax, footprint=fs.create_circular_mask(2)).compute(scheduler='processes').reshape(-1)
    if "slope_max_4" not in data_frame.columns:        
        data_frame["slope_max_4"] = d_gf(da.from_array(slope, chunks=chunk_size), np.amax, footprint=fs.create_circular_mask(4)).compute(scheduler='processes').reshape(-1)
    if "hpmf_max_6" not in data_frame.columns:        
        data_frame["slope_max_6"] = d_gf(da.from_array(slope, chunks=chunk_size), np.amax, footprint=fs.create_circular_mask(6)).compute(scheduler='processes').reshape(-1)
        
    if "slope_std_2" not in data_frame.columns:
        data_frame["slope_std_2"] = d_gf(da.from_array(slope, chunks=chunk_size), np.nanstd, footprint=fs.create_circular_mask(2)).compute(scheduler='processes').reshape(-1)
    if "slope_std_4" not in data_frame.columns:
        data_frame["slope_std_4"] = d_gf(da.from_array(slope, chunks=chunk_size), np.nanstd, footprint=fs.create_circular_mask(4)).compute(scheduler='processes').reshape(-1)
    if "slope_std_6" not in data_frame.columns:
        data_frame["slope_std_6"] = d_gf(da.from_array(slope, chunks=chunk_size), np.nanstd, footprint=fs.create_circular_mask(6)).compute(scheduler='processes').reshape(-1)
    
    if "slope_skew_2" not in data_frame.columns:
        data_frame["slope_skew_2"] = d_gf(da.from_array(slope, chunks=chunk_size), skew, footprint=fs.create_circular_mask(2)).compute(scheduler='processes').reshape(-1)
    
    return data_frame

def set_order(dataframe):
    return dataframe[[
    'label_3m',
    'hpmf_raw',
    'skyview_raw',
    'impundment_raw',
    'slope_raw',
    'DEM_ditch_detection',
    'DEM_ditch_detection_no_streams',
    'conic_mean',
    'skyview_non_ditch',
    'skyview_gabor',
    'conic_mean_no_streams',
    'skyview_gabor_no_streams',
    'skyview_mean_2',
    'skyview_mean_3',
    'skyview_mean_4',
    'skyview_mean_6',
    'skyview_median_2',
    'skyview_median_4',
    'skyview_median_6',
    'skyview_min_2',
    'skyview_min_4',
    'skyview_min_6',
    'skyview_max_2',
    'skyview_max_4',
    'skyview_max_6',
    'skyview_std_2',
    'skyview_std_4',
    'skyview_std_6',
    'impundment_amplified',
    'impoundment_amplified_no_streams',
    'impundment_mean_2',
    'impundment_mean_3',
    'impundment_mean_4',
    'impundment_mean_6',
    'impundment_median_2',
    'impundment_median_4',
    'impundment_median_6',
    'impundment_min_2',
    'impundment_min_4',
    'impundment_min_6',
    'impundment_max_2',
    'impundment_max_4',
    'impundment_max_6',
    'impundment_std_2',
    'impundment_std_4',
    'impundment_std_6',
    'hpmf_filter',
    'hpmf_gabor',
    'hpmf_gabor_no_streams',
    'hpmf_filter_no_streams',
    'hpmf_mean_2',
    'hpmf_mean_3',
    'hpmf_mean_4',
    'hpmf_mean_6',
    'hpmf_median_2',
    'hpmf_median_4',
    'hpmf_median_6',
    'hpmf_min_2',
    'hpmf_min_4',
    'hpmf_min_6',
    'hpmf_max_2',
    'hpmf_max_4',
    'hpmf_max_6',
    'hpmf_std_2',
    'hpmf_std_4',
    'hpmf_std_6',
    'slope_non_ditch',
    'slope_mean_2',
    'slope_mean_3',
    'slope_mean_4',
    'slope_mean_6',
    'slope_median_2',
    'slope_median_4',
    'slope_median_6',
    'slope_min_2',
    'slope_min_4',
    'slope_min_6',
    'slope_max_2',
    'slope_max_4',
    'hpmf_max_6',
    'slope_std_2',
    'slope_std_4',
    'slope_std_6',
    ]]

In [14]:
%%time
zone_with_features = load_raw(1, (900, 875))
zone_with_features = set_order(zone_with_features)
zone_with_features.to_pickle(f"{output_path}zone_1.pickle")

starting-DEM: 2019-04-17 17:12:24.952044
starting-skyview: 2019-04-17 17:12:24.952544
starting-impoundment: 2019-04-17 17:15:23.638389
starting-hpmf: 2019-04-17 17:18:25.762480
starting-slope: 2019-04-17 17:21:26.505497
CPU times: user 3min 1s, sys: 1min 23s, total: 4min 25s
Wall time: 13min 42s


In [16]:
%%time
zone_with_features = load_raw(2, (900, 875))
zone_with_features = set_order(zone_with_features)
zone_with_features.to_pickle(f"{output_path}zone_2.pickle")

starting-DEM: 2019-04-17 17:27:02.557300
starting-skyview: 2019-04-17 17:27:02.557462
starting-impoundment: 2019-04-17 17:30:11.555352
starting-hpmf: 2019-04-17 17:33:16.204204
starting-slope: 2019-04-17 17:36:27.323132
CPU times: user 2min 59s, sys: 1min 39s, total: 4min 38s
Wall time: 14min 4s


In [17]:
%%time
zone_with_features = load_raw(3, (900, 875))
zone_with_features = set_order(zone_with_features)
zone_with_features.to_pickle(f"{output_path}zone_3.pickle")

starting-DEM: 2019-04-17 17:41:14.808052
starting-skyview: 2019-04-17 17:41:14.808197
starting-impoundment: 2019-04-17 17:44:20.977283
starting-hpmf: 2019-04-17 17:47:29.045736
starting-slope: 2019-04-17 17:50:35.129400
CPU times: user 2min 57s, sys: 1min 39s, total: 4min 36s
Wall time: 14min 8s


In [18]:
%%time
zone_with_features = load_raw(4, (900, 875))
zone_with_features = set_order(zone_with_features)
zone_with_features.to_pickle(f"{output_path}zone_4.pickle")

starting-DEM: 2019-04-17 17:55:22.677541
starting-skyview: 2019-04-17 17:55:22.677685
starting-impoundment: 2019-04-17 17:58:29.505347
starting-hpmf: 2019-04-17 18:01:34.218186
starting-slope: 2019-04-17 18:04:38.578061
CPU times: user 3min, sys: 1min 39s, total: 4min 39s
Wall time: 14min 9s


In [19]:
%%time
zone_with_features = load_raw(5, (900, 875))
zone_with_features = set_order(zone_with_features)
zone_with_features.to_pickle(f"{output_path}zone_5.pickle")

starting-DEM: 2019-04-17 18:09:37.481308
starting-skyview: 2019-04-17 18:09:37.481619
starting-impoundment: 2019-04-17 18:12:39.980739
starting-hpmf: 2019-04-17 18:15:43.931996
starting-slope: 2019-04-17 18:18:49.931544
CPU times: user 3min 3s, sys: 1min 39s, total: 4min 43s
Wall time: 14min 13s


In [20]:
%%time
zone_with_features = load_raw(6, (900, 875))
zone_with_features = set_order(zone_with_features)
zone_with_features.to_pickle(f"{output_path}zone_6.pickle")

starting-DEM: 2019-04-17 18:23:47.093151
starting-skyview: 2019-04-17 18:23:47.093648
starting-impoundment: 2019-04-17 18:26:52.869664
starting-hpmf: 2019-04-17 18:29:58.671928
starting-slope: 2019-04-17 18:33:03.772903
CPU times: user 2min 59s, sys: 1min 38s, total: 4min 37s
Wall time: 14min 5s


In [21]:
%%time
zone_with_features = load_raw(7, (900, 875))
zone_with_features = set_order(zone_with_features)
zone_with_features.to_pickle(f"{output_path}zone_7.pickle")

starting-DEM: 2019-04-17 18:37:51.534515
starting-skyview: 2019-04-17 18:37:51.535016
starting-impoundment: 2019-04-17 18:40:55.803009
starting-hpmf: 2019-04-17 18:44:00.477388
starting-slope: 2019-04-17 18:47:15.901240
CPU times: user 2min 59s, sys: 1min 39s, total: 4min 38s
Wall time: 14min 12s


In [None]:
%%time
zone_with_features = load_raw(8, (900, 875))
zone_with_features.to_pickle(f"{output_path}zone_8.pickle")
zone_with_features = None

  morphed /= maxVal


starting-DEM: 2019-04-16 16:10:59.236884


In [23]:
%%time
zone_with_features = load_raw(9, (900, 875))
zone_with_features = set_order(zone_with_features)
zone_with_features.to_pickle(f"{output_path}zone_9.pickle")

starting-DEM: 2019-04-17 19:57:58.910450
starting-skyview: 2019-04-17 19:57:58.910662
starting-impoundment: 2019-04-17 20:03:14.311256
starting-hpmf: 2019-04-17 20:06:22.220674
starting-slope: 2019-04-17 20:09:34.124697
CPU times: user 3min 19s, sys: 1min 50s, total: 5min 10s
Wall time: 16min 44s


In [None]:
%%time
zone_with_features = load_raw(10, (900, 875))
zone_with_features.to_pickle(f"{output_path}zone_10.pickle")
zone_with_features = None

In [None]:
%%time
zone_with_features = load_raw(11, (900, 875))
zone_with_features.to_pickle(f"{output_path}zone_11.pickle")
zone_with_features = None

In [44]:
zone_3.drop(["label_raw"], axis=1)

Unnamed: 0,label_raw,hpmf_raw,skyview_raw,impundment_raw,slope_raw,conic_mean,skyview_non_ditch,skyview_mean_2,skyview_mean_3,skyview_mean_4,...,slope_non_ditch,slope_mean_2,slope_mean_3,slope_mean_4,slope_mean_6,slope_median_2,slope_median_4,slope_median_6,slope_std_6,slope_skew_2
0,0,-1130113270,0.991990,984612864,5,0.991990,1.0,0.993655,0.994245,0.994486,...,0,3,3,2,3,4,3,3,1,1
1,0,-1138501878,0.993762,1028243456,2,0.993762,1.0,0.994124,0.994262,0.993851,...,0,3,3,3,3,3,3,3,1,1
2,0,-1138501878,0.996549,1018445824,1,0.996549,1.0,0.995359,0.993899,0.993388,...,0,1,2,3,3,1,3,3,2,1
3,0,0,0.997279,1011154944,1,0.997279,1.0,0.995311,0.993862,0.992810,...,0,1,2,3,4,1,3,4,2,0
4,0,0,0.996672,1009876992,1,0.996672,1.0,0.994203,0.993247,0.990370,...,0,2,2,3,4,2,3,4,3,1
5,0,-1138501878,0.993235,0,4,0.993235,1.0,0.993486,0.989415,0.987011,...,0,3,4,4,4,3,4,4,3,2
6,0,-1130113270,0.994499,1016332288,3,0.994499,1.0,0.985664,0.982955,0.984831,...,0,5,6,5,4,4,4,4,3,3
7,0,-1113336054,0.975987,1020084224,10,0.975987,1.0,0.975306,0.980128,0.983125,...,0,8,7,6,4,10,5,4,3,3
8,0,-1102263091,0.950124,1048224768,11,0.950124,1.0,0.970972,0.978185,0.983491,...,0,9,7,6,4,11,6,4,3,3
9,0,-1104947446,0.979405,1047005184,13,0.979405,1.0,0.969861,0.976403,0.983010,...,0,8,7,6,4,10,7,4,3,4
