## PhD - Detect abandoned villages in Nigeria

### Libraries

In [None]:
%tensorflow_version 1.x

TensorFlow is already loaded. Please restart the runtime to change versions.


In [None]:
# Check whether the kernel is backened with GPU
import sys

import tensorflow.keras
import pandas as pd
import sklearn as sk
import tensorflow as tf

print(f"Tensor Flow Version: {tf.__version__}")
print(f"Keras Version: {tensorflow.keras.__version__}")
print()
print(f"Python {sys.version}")
print(f"Pandas {pd.__version__}")
print(f"Scikit-Learn {sk.__version__}")
gpu = len(tf.config.list_physical_devices('GPU'))>0
print("GPU is", "available" if gpu else "NOT AVAILABLE")

Tensor Flow Version: 2.3.0
Keras Version: 2.4.0

Python 3.6.9 (default, Jul 17 2020, 12:50:27) 
[GCC 8.4.0]
Pandas 1.1.3
Scikit-Learn 0.22.2.post1
GPU is available


In [None]:
!pip install efficientnet



In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
import os
from os import listdir
import random

import tensorflow as tf
print(tf.__version__)

from keras.callbacks import ModelCheckpoint
from keras.models import Model
from keras.layers import Dropout, Flatten, Dense
from keras import optimizers
from efficientnet.keras import EfficientNetB0
from keras.models import load_model

from sklearn.model_selection import train_test_split

2.3.0


In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
path = "/content/drive/My Drive/PhD_Detect_Abandoned_Villages"

Mounted at /content/drive


### Helper functions

In [None]:
def define_path():
    
    path_base = "C:\\Users\\b1077901\\PhD_Projects"
    path_array = os.path.join(path_base, "Array")
    path_model_base = os.path.join(path_base, "Models") 
    
    return path_base, path_array, path_model_base

# load image data
def load_x(data_version, img_type, buffer_len, data_type):
    
    x_name = "dataset" + "_" + str(data_version) + "_" + img_type + "_Buf" + str(buffer_len) + "_" + data_type + "_img.npy"
    x_path = os.path.join(path_array, x_name)
    x_img = np.load(x_path)
    print(data_type, "img shape", x_img.shape)
    
    return x_img

# load image data
# dataset_1_NDVI_Buf320_Dest_test_img.npy
def load_x_status_seperate(data_version, img_type, buffer_len, data_type, status):
    
    x_name = "dataset" + "_" + str(data_version) + "_" + img_type + "_Buf" + str(buffer_len) + "_" + status + "_" + data_type + "_img.npy"
    x_path = os.path.join(path_array, x_name)
    x_img = np.load(x_path)
    print("img shape", x_img.shape)
    
    return x_img

def load_y(data_version, img_type, buffer_len, merge_type, data_type):
    
    y_name = "dataset" + "_" + str(data_version) + "_" + img_type + "_Buf" + str(buffer_len) + "_" + data_type + "_"  + merge_type + "_lab.npy"
    y_path = os.path.join(path_array, y_name)
    y_lab = np.load(y_path)
    print(merge_type, "lab shape", y_lab.shape)

    return y_lab

def get_model_type(img_type, buffer_len, merge_type, n_fc, batch_size):
    
    model_type = img_type + "_buf" + str(buffer_len) + "_" + merge_type + "_fc" + str(n_fc) + "_bs" + str(batch_size)
    
    return model_type

def get_model_name(img_type, buffer_len, merge_type, n_fc, batch_size):
    
    model_type = get_model_type(img_type, buffer_len, merge_type, n_fc, batch_size)
    model_name = model_type+"_model.h5"
    model_name = os.path.join(path_model_base, model_name)
    
    return model_name

def get_history_name(img_type, buffer_len, merge_type, n_fc, batch_size):
    
    model_type = get_model_type(img_type, buffer_len, merge_type, n_fc, batch_size)
    history_name = os.path.join(path_model_base, "Model_"+model_type+"_history.csv") 
    
    return history_name

def get_train_val_acc(img_type, buffer_len, merge_type, n_fc, batch_size):

    history_name = get_history_name(img_type, buffer_len, merge_type, n_fc, batch_size)

    train_val_acc_df = pd.read_csv(history_name) 
    train_acc = list(train_val_acc_df["accuracy"])[-1]
    val_acc = list(train_val_acc_df["val_accuracy"])[-1]

    return train_acc, val_acc

def get_num_total_and_predicted_destroyed(model, data_version, img_type, buffer_len, data_type, status):

    x_status = load_x_status_seperate(data_version, img_type, buffer_len, data_type, status)
    num_total = x_status.shape[0]

    prediction = model.predict(x_status)
    prediction_round = np.round(prediction)
    num_pred_dest = np.sum(prediction_round)
    
    return num_total, num_pred_dest

def get_seperate_acc_label_1(model, data_version, img_type, buffer_len, data_type, status):
    
    num_total, num_pred_dest = get_num_total_and_predicted_destroyed(model, data_version, img_type, buffer_len, data_type, status)
    acc = num_pred_dest / num_total
    
    return acc
    
def get_seperate_acc_label_0(model, data_version, img_type, buffer_len, data_type, status):
    
    num_total, num_pred_dest = get_num_total_and_predicted_destroyed(model, data_version, img_type, buffer_len, data_type, status)
    acc = 1 - (num_pred_dest / num_total)
    
    return acc

def save_accuracy_to_excel(acc_dict, acc_out_name):
    
    acc_pd = pd.DataFrame.from_dict(acc_dict)
    acc_name = os.path.join(path_base, acc_out_name)
    with pd.ExcelWriter(acc_name,
                        mode='w') as writer:  
        acc_pd.to_excel(writer, sheet_name='Sheet_name_1')

    print("Accurcy excel saved.")
    print(acc_pd.head(5))

### Load Train and Test Data

In [None]:
# define path
path_base, path_array, path_model_base = define_path()

# set the type of images
img_type_list = ["NDVI", "PCA"]

# define the length of square buffer
buffer_list = [320, 480, 640, 800]

# dataset types
data_type_list = ["train", "test"]

# merge type
# partfull: merge partially functional and fully functional together
# partdest: merge partially functional and destroyed together
merge_type_list = ["partfull", "partdest"]
status_list = ["Dest", "Full", "Part"]

# define the hyperparameters for the model
n_fc_list = [16, 24, 32, 40]
batch_size_list = [16, 32, 64, 128, 256]

In [None]:
data_type = "test"
acc_dict_keys = ["img_type", "buf", "merge", "fc", "bs", "train", "val", "test", "dest", "full", "part-to-dest", "part-to-full"]
acc_dict = {}

for i in acc_dict_keys:
    acc_dict[i] = []
    
acc_dict

{'img_type': [],
 'buf': [],
 'merge': [],
 'fc': [],
 'bs': [],
 'train': [],
 'val': [],
 'test': [],
 'dest': [],
 'full': [],
 'part-to-dest': [],
 'part-to-full': []}

In [None]:
data_version = 0
test_num = 0

for i in range(len(img_type_list)):
    
    img_type = img_type_list[i]
    
    for j in range(len(buffer_list)):
        
        buffer_len = buffer_list[j]
        data_version += 1
        
        for k in range(len(merge_type_list)):
            
            merge_type = merge_type_list[k]
            
            X_ts = load_x(data_version, img_type, buffer_len, data_type)
            Y_ts = load_y(data_version, img_type, buffer_len, merge_type, data_type)
            
            for l in range(len(n_fc_list)):
                
                n_fc = n_fc_list[l]
                
                for m in range(len(batch_size_list)):
                    
                    batch_size = batch_size_list[m]
                    
                    model_name = get_model_name(img_type, buffer_len, merge_type, n_fc, batch_size)
                    model = load_model(model_name)
                    
                    # test_oa_acc: overall accuracy of test data
                    loss, test_acc = model.evaluate(X_ts, Y_ts)
                    
                    train_acc, val_acc = get_train_val_acc(img_type, buffer_len, merge_type, n_fc, batch_size)
                        
                    status = status_list[0]
                    dest_acc = get_seperate_acc_label_1(model, data_version, img_type, buffer_len, data_type, status)
                    
                    status = status_list[1]
                    full_acc = get_seperate_acc_label_0(model, data_version, img_type, buffer_len, data_type, status)
                    
                    status = status_list[2]
                    part_to_full_acc = get_seperate_acc_label_0(model, data_version, img_type, buffer_len, data_type, status)
                    part_to_dest_acc = get_seperate_acc_label_1(model, data_version, img_type, buffer_len, data_type, status)
                    
                    final_items = [img_type, buffer_len, merge_type, n_fc, batch_size, train_acc, val_acc, test_acc,dest_acc, full_acc, part_to_dest_acc, part_to_full_acc]
                    
                    for n in range(len(final_items)):
                        
                        acc_dict[acc_dict_keys[n]].append(final_items[n])
                    
                    test_num += 1
                    print("Test number:", test_num)
                    print("")
            
            acc_out_name = "accuracy_"+str(data_version)+".xlsx"
            save_accuracy_to_excel(acc_dict, acc_out_name)

test img shape (504, 32, 32, 3)
partfull lab shape (504,)
img shape (224, 32, 32, 3)
img shape (248, 32, 32, 3)
img shape (32, 32, 32, 3)
img shape (32, 32, 32, 3)
Test number: 1

img shape (224, 32, 32, 3)
img shape (248, 32, 32, 3)
img shape (32, 32, 32, 3)
img shape (32, 32, 32, 3)
Test number: 2

img shape (224, 32, 32, 3)
img shape (248, 32, 32, 3)
img shape (32, 32, 32, 3)
img shape (32, 32, 32, 3)
Test number: 3

img shape (224, 32, 32, 3)
img shape (248, 32, 32, 3)
img shape (32, 32, 32, 3)
img shape (32, 32, 32, 3)
Test number: 4

img shape (224, 32, 32, 3)
img shape (248, 32, 32, 3)
img shape (32, 32, 32, 3)
img shape (32, 32, 32, 3)
Test number: 5

img shape (224, 32, 32, 3)
img shape (248, 32, 32, 3)
img shape (32, 32, 32, 3)
img shape (32, 32, 32, 3)
Test number: 6

img shape (224, 32, 32, 3)
img shape (248, 32, 32, 3)
img shape (32, 32, 32, 3)
img shape (32, 32, 32, 3)
Test number: 7

img shape (224, 32, 32, 3)
img shape (248, 32, 32, 3)
img shape (32, 32, 32, 3)
img shap

img shape (216, 48, 48, 3)
img shape (241, 48, 48, 3)
img shape (32, 48, 48, 3)
img shape (32, 48, 48, 3)
Test number: 41

img shape (216, 48, 48, 3)
img shape (241, 48, 48, 3)
img shape (32, 48, 48, 3)
img shape (32, 48, 48, 3)
Test number: 42

img shape (216, 48, 48, 3)
img shape (241, 48, 48, 3)
img shape (32, 48, 48, 3)
img shape (32, 48, 48, 3)
Test number: 43

img shape (216, 48, 48, 3)
img shape (241, 48, 48, 3)
img shape (32, 48, 48, 3)
img shape (32, 48, 48, 3)
Test number: 44

img shape (216, 48, 48, 3)
img shape (241, 48, 48, 3)
img shape (32, 48, 48, 3)
img shape (32, 48, 48, 3)
Test number: 45

img shape (216, 48, 48, 3)
img shape (241, 48, 48, 3)
img shape (32, 48, 48, 3)
img shape (32, 48, 48, 3)
Test number: 46

img shape (216, 48, 48, 3)
img shape (241, 48, 48, 3)
img shape (32, 48, 48, 3)
img shape (32, 48, 48, 3)
Test number: 47

img shape (216, 48, 48, 3)
img shape (241, 48, 48, 3)
img shape (32, 48, 48, 3)
img shape (32, 48, 48, 3)
Test number: 48

img shape (216, 

test img shape (480, 64, 64, 3)
partfull lab shape (480,)
img shape (211, 64, 64, 3)
img shape (240, 64, 64, 3)
img shape (29, 64, 64, 3)
img shape (29, 64, 64, 3)
Test number: 81

img shape (211, 64, 64, 3)
img shape (240, 64, 64, 3)
img shape (29, 64, 64, 3)
img shape (29, 64, 64, 3)
Test number: 82

img shape (211, 64, 64, 3)
img shape (240, 64, 64, 3)
img shape (29, 64, 64, 3)
img shape (29, 64, 64, 3)
Test number: 83

img shape (211, 64, 64, 3)
img shape (240, 64, 64, 3)
img shape (29, 64, 64, 3)
img shape (29, 64, 64, 3)
Test number: 84

img shape (211, 64, 64, 3)
img shape (240, 64, 64, 3)
img shape (29, 64, 64, 3)
img shape (29, 64, 64, 3)
Test number: 85

img shape (211, 64, 64, 3)
img shape (240, 64, 64, 3)
img shape (29, 64, 64, 3)
img shape (29, 64, 64, 3)
Test number: 86

img shape (211, 64, 64, 3)
img shape (240, 64, 64, 3)
img shape (29, 64, 64, 3)
img shape (29, 64, 64, 3)
Test number: 87

img shape (211, 64, 64, 3)
img shape (240, 64, 64, 3)
img shape (29, 64, 64, 3)
i

partfull lab shape (474,)
img shape (210, 80, 80, 3)
img shape (236, 80, 80, 3)
img shape (28, 80, 80, 3)
img shape (28, 80, 80, 3)
Test number: 121

img shape (210, 80, 80, 3)
img shape (236, 80, 80, 3)
img shape (28, 80, 80, 3)
img shape (28, 80, 80, 3)
Test number: 122

img shape (210, 80, 80, 3)
img shape (236, 80, 80, 3)
img shape (28, 80, 80, 3)
img shape (28, 80, 80, 3)
Test number: 123

img shape (210, 80, 80, 3)
img shape (236, 80, 80, 3)
img shape (28, 80, 80, 3)
img shape (28, 80, 80, 3)
Test number: 124

img shape (210, 80, 80, 3)
img shape (236, 80, 80, 3)
img shape (28, 80, 80, 3)
img shape (28, 80, 80, 3)
Test number: 125

img shape (210, 80, 80, 3)
img shape (236, 80, 80, 3)
img shape (28, 80, 80, 3)
img shape (28, 80, 80, 3)
Test number: 126

img shape (210, 80, 80, 3)
img shape (236, 80, 80, 3)
img shape (28, 80, 80, 3)
img shape (28, 80, 80, 3)
Test number: 127

img shape (210, 80, 80, 3)
img shape (236, 80, 80, 3)
img shape (28, 80, 80, 3)
img shape (28, 80, 80, 3)


In [None]:
acc_out_name = "accuracy_total"+".xlsx"
save_accuracy_to_excel(acc_dict, acc_out_name)

## Evaluation for Data Augmentation models

In [None]:
def define_path(path):
    
    path_base = path
    path_model = os.path.join(path_base, "Models")
    path_accuracy = os.path.join(path_base, "Accuracy")
    
    return path_base, path_model, path_accuracy

def get_model_type(img_type, buffer_len, merge_type, n_fc, batch_size, DA_type):
    
    model_type = img_type + "_buf" + str(buffer_len) + "_" + merge_type + "_fc" + str(n_fc) + "_bs" + str(batch_size)+"_"+DA_type
    
    return model_type

In [None]:
# important variables
path_base, path_model, path_accuracy = define_path(path)
img_type = "PCA"
merge_type = "partfull"
buffer_len = 640
n_fc = 24
batch_size = 256
nbands = 3
n_epoch = 100
val_division = 0.2

# variables of data augmentation
rotate_angle = 60
shift_pixel = 16
direction = "left"

In [None]:
# define CNN model
pretrained_model = eff_net_b0(int(buffer_len/10))

model = define_model(pretrained_model, n_fc)

In [None]:
load the test data
load the model
predict the test data by the model
record the results

compare the results 