In [None]:
%load_ext autoreload
import numpy as np
import seaborn as sns
import os
import GallenModel as ClassificationModelsimple
import geopandas as gpd
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

# Cross Validation Routine

In [None]:
def trainmodel(model,train_ds,val_ds):
    
    NUMBER_EPOCHS = 100
    filepath='TrainedWeightsCrossVal'
    BATCH_SIZE=32
    
    model_checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
        filepath,
        monitor="val_auc",
        verbose=0,
        save_best_only=True,
        save_weights_only=False,
        mode="max",
        save_freq="epoch",
        options=None
    )
    print(type(train_ds))
    hist = model.fit(train_ds,
                     epochs=NUMBER_EPOCHS,
                     batch_size=BATCH_SIZE,
                     validation_data=val_ds,
                    #  validation_split=0.2,#auto validate using 20% of random samples at each epoch
                     verbose=1, callbacks=[model_checkpoint_callback],class_weight = {0: 1, 1: 5}

                    )
    return hist


In [None]:
#Spatial cross validation

def df_to_dataset(dataframe, shuffle=True, batch_size=32):
  df = dataframe.copy()
  labels = df.pop('Landslide')
  df = {key: value.to_numpy()[:,tf.newaxis] for key, value in dataframe.items()}
  ds = tf.data.Dataset.from_tensor_slices((dict(df), labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  ds = ds.batch(batch_size)
  ds = ds.prefetch(batch_size)
  return ds

def get_category_encoding_layer(name, dataset, dtype, max_tokens=None):
  # Create a layer that turns strings into integer indices.
  if dtype == 'string':
    index = layers.StringLookup(max_tokens=max_tokens)
  # Otherwise, create a layer that turns integer values into integer indices.
  else:
    index = layers.IntegerLookup(max_tokens=max_tokens)

  # Prepare a `tf.data.Dataset` that only yields the feature.
  feature_ds = dataset.map(lambda x, y: x[name])

  # Learn the set of possible values and assign them a fixed integer index.
  index.adapt(feature_ds)

  # Encode the integer indices.
  encoder = layers.CategoryEncoding(num_tokens=index.vocabulary_size())

  # Apply multi-hot encoding to the indices. The lambda function captures the
  # layer, so you can use them, or include them in the Keras Functional model later.
  return lambda feature: encoder(index(feature))

def get_normalization_layer(name, dataset):
  # Create a Normalization layer for the feature.
  normalizer = layers.Normalization(axis=None)

  # Prepare a Dataset that only yields the feature.
  feature_ds = dataset.map(lambda x, y: x[name])

  # Learn the statistics of the data.
  normalizer.adapt(feature_ds)

  return normalizer

def spatialCrossVal():
    for i in range(1,11):
        print(i)
        all_inputs = []
        encoded_features = []
        
        train_df=df[df.id!=i]
        test_df=df[df.id==i]
        print(f"Number of train set{len(train_df)} and number of test set {len(test_df)}")
 
        exai_ds=df_to_dataset(train_df[['Est_m','Nrt_m','HC_m','VC_m','Slp_m','Prc_m','NDVI_m','PGA_Usgs','Sand_m','Silt_m','Clay_m','Bdod_m','GLG','Landslide']])
        val_ds=df_to_dataset(test_df[['Est_m','Nrt_m','HC_m','VC_m','Slp_m','Prc_m','NDVI_m','PGA_Usgs','Sand_m','Silt_m','Clay_m','Bdod_m','GLG','Landslide']],shuffle=False)
        y_test=test_df['Landslide'].to_numpy()
        

        for header in numerical_cols:
          numeric_col = tf.keras.Input(shape=(1,), name=header)
          normalization_layer = get_normalization_layer(header, exai_ds)
          encoded_numeric_col = normalization_layer(numeric_col)
          all_inputs.append(numeric_col)
          encoded_features.append(encoded_numeric_col)

        for header in categorical_cols:
          categorical_col = tf.keras.Input(shape=(1,), name=header, dtype='string')
          encoding_layer = get_category_encoding_layer(name=header,
                                                      dataset=exai_ds,
                                                      dtype='string',
                                                      max_tokens=9)
          encoded_categorical_col = encoding_layer(categorical_col)
          all_inputs.append(categorical_col)
          encoded_features.append(encoded_categorical_col)

        clfmdl=ClassificationModelsimple.LandslideModel()
        clfmdl.getclassificationModel(all_inputs=all_inputs, encoded_features=encoded_features)
        clfmdl.getOptimizer()
        clfmdl.compileModel()

        trainmodel(clfmdl.model,exai_ds,val_ds)
        model =  tf.keras.models.load_model("TrainedWeightsCrossVal/")
        preds=model.predict(val_ds)
        np.save(f'crossval_resultsGallen/SpPredsrv1_{str(i)}.npy',preds)
        np.save(f'crossval_resultsGallen/SpTruthsrv1_{str(i)}.npy',y_test)
        del model,clfmdl
        tf.keras.backend.clear_session()
        i+=1
def randomCrossVal(dfc):
  kf = KFold(n_splits=10,random_state=42,shuffle=True)
  kf.get_n_splits(df)
  i=0
  for train_index, test_index in kf.split(df):
    print(i)
    all_inputs = []
    encoded_features = []
    
    #    df.iloc[train_index]
    # df.iloc[test_index]
    train_df=dfc.iloc[train_index]
    test_df=dfc.iloc[test_index]
    print(f"Number of train set{len(train_df)} and number of test set {len(test_df)}")

    exai_ds=df_to_dataset(train_df[['Est_m','Nrt_m','HC_m','VC_m','Slp_m','Prc_m','NDVI_m','PGA_Usgs','Sand_m','Silt_m','Clay_m','Bdod_m','GLG','Landslide']])
    val_ds=df_to_dataset(test_df[['Est_m','Nrt_m','HC_m','VC_m','Slp_m','Prc_m','NDVI_m','PGA_Usgs','Sand_m','Silt_m','Clay_m','Bdod_m','GLG','Landslide']],shuffle=False)
    y_test=test_df['Landslide'].to_numpy()
    

    for header in numerical_cols:
      numeric_col = tf.keras.Input(shape=(1,), name=header)
      normalization_layer = get_normalization_layer(header, exai_ds)
      encoded_numeric_col = normalization_layer(numeric_col)
      all_inputs.append(numeric_col)
      encoded_features.append(encoded_numeric_col)

    for header in categorical_cols:
      categorical_col = tf.keras.Input(shape=(1,), name=header, dtype='string')
      encoding_layer = get_category_encoding_layer(name=header,
                                                  dataset=exai_ds,
                                                  dtype='string',
                                                  max_tokens=9)
      encoded_categorical_col = encoding_layer(categorical_col)
      all_inputs.append(categorical_col)
      encoded_features.append(encoded_categorical_col)

    clfmdl=ClassificationModelsimple.LandslideModel()
    clfmdl.getclassificationModel(all_inputs=all_inputs, encoded_features=encoded_features)
    clfmdl.getOptimizer()
    clfmdl.compileModel()

    trainmodel(clfmdl.model,exai_ds,val_ds)
    model =  tf.keras.models.load_model("TrainedWeightsCrossVal/")
    preds=model.predict(val_ds)
    np.save(f'crossval_resultsGallen/RvPredsrv2_{str(i)}.npy',preds)
    np.save(f'crossval_resultsGallen/RvTruthsrv2_{str(i)}.npy',y_test)
    del model,clfmdl
    tf.keras.backend.clear_session()
    i+=1
    # del clfmdl

In [None]:
categorical_cols = ['GLG']
numerical_cols=['Est_m', 'Nrt_m', 'HC_m', 'VC_m', 'Slp_m', 'Prc_m', 'NDVI_m', 'PGA_Usgs', 'Sand_m', 'Silt_m', 'Clay_m', 'Bdod_m']
df=gpd.read_file('Data/NepalEqUSGSGallen.gpkg')
df = df[df.Slp_m>10.0]
spatialCrossVal()

In [None]:
categorical_cols = ['GLG']
numerical_cols=['Est_m', 'Nrt_m', 'HC_m', 'VC_m', 'Slp_m', 'Prc_m', 'NDVI_m', 'PGA_Usgs', 'Sand_m', 'Silt_m', 'Clay_m', 'Bdod_m']
df=gpd.read_file('Data/NepalEqUSGSGallen.gpkg')
df = df[df.Slp_m>10.0]
randomCrossVal(df)

# Cross validation plots

In [None]:
import numpy as np
def roc_curve(y_true, y_prob, thresholds):

    fpr = []
    tpr = []

    for threshold in thresholds:

        y_pred = np.where(y_prob >= threshold, 1, 0)

        fp = np.sum((y_pred == 1) & (y_true == 0))
        tp = np.sum((y_pred == 1) & (y_true == 1))

        fn = np.sum((y_pred == 0) & (y_true == 1))
        tn = np.sum((y_pred == 0) & (y_true == 0))

        fpr.append(fp / (fp + tn))
        tpr.append(tp / (tp + fn))

    return fpr, tpr

In [None]:
#plot 
import sklearn.metrics
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.pyplot import figure
import numpy as np 
from scipy.interpolate import interp1d
figure(figsize=(4, 4), dpi=300)
Aucs=[]
fprs=[]
tprs=[]
for i in range(1,11):
    preds=np.load(f'crossval_resultsGallen/SpPredsrv1_{str(i)}.npy')
    truths=np.load(f'crossval_resultsGallen/SpTruthsrv1_{str(i)}.npy')
    fpr,tpr = roc_curve(truths.flatten(),preds.flatten(),thresholds=np.linspace(0,1,500))
    plt.plot(np.array(fpr),np.array(tpr),lw=0.5,color='grey')
    fprs.append(fpr)
    tprs.append(tpr)
    fpr,tpr,thresholds=sklearn.metrics.roc_curve(truths.flatten(), preds.flatten(),drop_intermediate=False)
    Aucs.append(sklearn.metrics.auc(fpr,tpr))
# 
median_idx = np.argsort(Aucs)[len(Aucs)//2]

plt.plot(np.array(fprs)[median_idx],np.array(tprs)[median_idx],lw=0.5,color='g')


# common_fpr = np.linspace(0.001, 1, 100)
# interp_tpr1 = interp1d(np.array(fprs)[np.argmin(np.array(tprs),axis=0)].min(axis=0), np.array(tprs).min(axis=0), kind='linear')(common_fpr)
# interp_tpr2 = interp1d(np.array(fprs)[np.argmax(np.array(tprs),axis=0)].max(axis=0), np.array(tprs).max(axis=0), kind='linear')(common_fpr)
# plt.fill_between(common_fpr,interp_tpr1,interp_tpr2, color='grey')

ax = plt.plot([0, 1], [0, 1], color="navy", lw=1, linestyle="--")
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.0])
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve Spatial Crossvalidation")
# plt.text(0.38, 0.11,"Accuracy=0.8131")
# plt.legend(loc="lower right")
plt.axis('square')
plt.tight_layout()
plt.savefig('PINNPlotsGallen/rocSpValrev1.pdf')
plt.show()


In [None]:
#plot 
import sklearn.metrics
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.pyplot import figure
import numpy as np 
figure(figsize=(4, 4), dpi=300)
Aucs2=[]
fprs=[]
tprs=[]
for i in range(0,10):
    preds=np.load(f'crossval_resultsGallen/RvPredsrv2_{str(i)}.npy')
    truths=np.load(f'crossval_resultsGallen/RvTruthsrv2_{str(i)}.npy')
    fpr,tpr,thresholds=sklearn.metrics.roc_curve(truths.flatten(), preds.flatten())
    plt.plot(
        fpr,
        tpr,
        # color="darkorange",
        lw=0.25,
        label=f"RandomArea{str(i+1)}",color='grey'
    )
    Aucs2.append(sklearn.metrics.auc(fpr,tpr))
    fprs.append(fpr)
    tprs.append(tpr)

median_idx = np.argsort(Aucs2)[len(Aucs2)//2]

plt.plot(fprs[median_idx],tprs[median_idx],lw=1.0,color='g')

ax=plt.plot([0, 1], [0, 1], color="navy", lw=1, linestyle="--")
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.0])
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve Random Crossvalidation")
# plt.text(0.38, 0.11,"Accuracy=0.8131")
# plt.legend(loc="lower right")
plt.axis('square')
plt.tight_layout()
plt.savefig('PINNPlotsGallen/rocrandValrv1.pdf')
plt.show()


In [None]:

plt.boxplot(Aucs2)
plt.ylim(0.5,1)
plt.savefig('PINNPlotsGallen/violin_randomvalrv2.pdf')


In [None]:
plt.boxplot(Aucs)
plt.ylim(0.5,1)
plt.savefig('PINNPlotsGallen/violin_spvalrv2.pdf')

# Plot all confusion maps


In [None]:
#plot 
import sklearn.metrics
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.pyplot import figure
import numpy as np 
import geopandas as gpd
%load_ext autoreload
import numpy as np
import seaborn as sns
import os
# import GallenModel as ClassificationModelsimple
import geopandas as gpd
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
rcl_indexes=[]
df=gpd.read_file('Data/NepalEqUSGSGallen.gpkg')
df = df[df.Slp_m>10.0]
kf = KFold(n_splits=10,random_state=42,shuffle=True)
kf.get_n_splits(df)
i=0
for train_index, test_index in kf.split(df):
    rcl_indexes.append(test_index)

for i in range(0,10):
    print(i)
    preds=np.load(f'crossval_resultsGallen/RvPredsrv1_{str(i)}.npy')
    truths=np.load(f'crossval_resultsGallen/RvTruthsrv1_{str(i)}.npy')

    #confusion  map
    confusiondata=np.empty(truths.shape,dtype=object)
    confusiondata[np.bitwise_and(truths.flatten()==1,np.rint(preds.flatten())==1)]='True Positive'
    confusiondata[np.bitwise_and(truths.flatten()==0,np.rint(preds.flatten())==1)]='False Positive'
    confusiondata[np.bitwise_and(truths.flatten()==1,np.rint(preds.flatten())==0)]='False Negative'
    confusiondata[np.bitwise_and(truths.flatten()==0,np.rint(preds.flatten())==0)]='True Negative'
    sel_idx=rcl_indexes[i].tolist()
    df.loc[sel_idx,'rcl_confusion']=confusiondata
df_rc=df

In [None]:
df=gpd.read_file('Data/NepalEqUSGSGallen.gpkg')
df = df[df.Slp_m>10.0]
for i in range(1,11):
    preds=np.load(f'crossval_resultsGallen/SpPredsrv2_{str(i)}.npy')
    truths=np.load(f'crossval_resultsGallen/SpTruthsrv2_{str(i)}.npy')
    print(i)
    #confusion  map
    confusiondata=np.empty(truths.shape,dtype=object)
    confusiondata[np.bitwise_and(truths.flatten()==1,np.rint(preds.flatten())==1)]='True Positive'
    confusiondata[np.bitwise_and(truths.flatten()==0,np.rint(preds.flatten())==1)]='False Positive'
    confusiondata[np.bitwise_and(truths.flatten()==1,np.rint(preds.flatten())==0)]='False Negative'
    confusiondata[np.bitwise_and(truths.flatten()==0,np.rint(preds.flatten())==0)]='True Negative'
    sel_idx=df.index[df['id']==i].tolist()
    df.loc[sel_idx,'scl_confusion']=confusiondata
df_sc=df

In [None]:
import contextily as cx
from matplotlib_scalebar.scalebar import ScaleBar
df_wm = df_sc.to_crs(epsg=3857)
df_wm = df_wm[df_wm['Slp_m']>10.0]
ax=df_wm.plot(column='scl_confusion',legend=False,figsize=(10, 10), alpha=0.6,linewidth=0)
cx.add_basemap(ax,source='NASAGIBS.ASTER_GDEM_Greyscale_Shaded_Relief')
ax.add_artist(ScaleBar(1))
# ax.set_axis_off()

In [None]:
ax.get_figure().savefig('PINNPlotsGallen/confusionmap_scvrv2.pdf',dpi=500,facecolor=ax.get_facecolor())

In [None]:
import contextily as cx
df_wm = df_rc.to_crs(epsg=3857)
df_wm = df_wm[df_wm['Slp_m']>10.0]
ax=df_wm.plot(column='rcl_confusion',legend=False,figsize=(10, 10), alpha=0.6,linewidth=0)
cx.add_basemap(ax,source='NASAGIBS.ASTER_GDEM_Greyscale_Shaded_Relief')
ax.add_artist(ScaleBar(1))

In [None]:
ax.get_figure().savefig('PINNPlotsGallen/confusionmap_rcvrv2.pdf',dpi=500,facecolor=ax.get_facecolor())

In [None]:
ax=df_rc.rcl_confusion.value_counts().plot(kind='barh',logx=True,xlim=(10,1e4))
ax.get_figure().savefig('PINNPlotsGallen/barplot_rcvrv2.pdf',dpi=500,facecolor=ax.get_facecolor())

In [None]:
ax=df_sc.scl_confusion.value_counts().plot(kind='barh',logx=True,xlim=(10,1e4))
ax.get_figure().savefig('PINNPlotsGallen/barplot_scvrv2.pdf',dpi=500,facecolor=ax.get_facecolor())

# Activation Function Plot

In [None]:
import matplotlib.pyplot as plt
import numpy as np


In [None]:
def lsactivation(x):
    # x=x-5.0
    return 1/(1+np.exp(5-x))

In [None]:
x=np.linspace(0,10,100)
y=lsactivation(x)

In [None]:
fig = plt.figure()
ax = fig.gca()
ax.set_xticks(np.arange(0, 11, 1))
ax.set_yticks(np.arange(0, 1.1, 0.1))
plt.plot(x,y,label=r"$p(s) = \frac{1}{1+\exp(5-D(s))}$")
plt.grid()
plt.axvline(5.0, color='black')
plt.axhline(0.50, color='black')
plt.xlabel("Deformation (cm)")
plt.ylabel("Probability")
plt.legend()
plt.savefig("PINNPlotsGallen/landslideactivation.png",dpi=300)