# Reading dataset

### Importing all necessary libraries

In [1]:
import os
import numpy as np
from tensorflow import keras
from pyrsgis import raster
from pyrsgis.convert import changeDimension
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, precision_score, recall_score
from tensorflow.keras.layers import Dense, Activation,Dropout 
import matplotlib.pyplot as plt
import pandas as pd

2024-05-20 21:46:19.711558: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
import pyrsgis
import numpy as np

# Explore functions in pyrsgis.raster
print("Functions in pyrsgis.raster:")
print(dir(pyrsgis.raster))

# Custom function to reshape array if array_to_table is not found
def change_dimension(array, new_shape):
    reshaped_array = np.reshape(array, new_shape)
    return reshaped_array

# Sample data
array = np.random.rand(4, 4, 3)
new_dimensions = (16, 3)

# Using the custom change_dimension function
new_array = change_dimension(array, new_dimensions)
print("Using custom change_dimension function:")
print(new_array)


Functions in pyrsgis.raster:
['__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', '_create_ds', '_extract_bands', 'clip', 'clip_file', 'deepcopy', 'doc_address', 'easting', 'export', 'gdal', 'north_east', 'north_east_coordinates', 'northing', 'np', 'os', 'radiometric_correction', 'raster_dtype', 'read', 'shift', 'shift_file', 'trim', 'trim_array', 'trim_file']
Using custom change_dimension function:
[[0.0228538  0.82380297 0.28760177]
 [0.21677325 0.78479013 0.81660144]
 [0.99234807 0.00883138 0.24773764]
 [0.19319875 0.61278216 0.09743102]
 [0.58781297 0.68029528 0.90636807]
 [0.49589306 0.70055783 0.99667284]
 [0.03981985 0.36160651 0.32616887]
 [0.41845809 0.66536481 0.42697694]
 [0.45908418 0.79831511 0.64809279]
 [0.5939911  0.27247278 0.48535374]
 [0.70976455 0.14211152 0.35020648]
 [0.96376892 0.23259424 0.01119909]
 [0.85016538 0.44194166 0.04990697]
 [0.35559255 0.16377609 0.35616319]
 [0.97404774 0.41988568 0.6

In [7]:
# Change the directory
os.chdir(r"/home/usmanghias/UsmanGhias/Final_Year_Project/GitHub/FinalYearProject/Week 11/multi_spectral_data")

# Assign file names
debris_image = 'image1.tif'
debris_label = 'label1.tif'
prediction = 'test.tif'

# Enter number of images to train from 1,2,3......n
n = 20

# Loop through all images and labels to form training data
for i in range(1,n):
    if(i==1):
        # Read the rasters as array
        ds1, featuresdebris = raster.read(debris_image, bands='all')
        ds2, debris_label = raster.read(debris_label)
        ds3, prediction = raster.read(prediction, bands='all')
        # Clean the labelled data to replace NoData values by zero
        debris_label = (debris_label == 1).astype(int)
        # Reshape the array to single dimensional array
        featuresdebris = changeDimension(featuresdebris)
        debris_label = changeDimension(debris_label)
        prediction = changeDimension(prediction)
        nBands = featuresdebris.shape[1]
    else:
        debris_image2 = 'image'+str(i)+'.tif'
        debris_label2 = 'label'+str(i)+'.tif'
        ds12, featuresdebris2 = raster.read(debris_image2, bands='all')
        ds22, debris_label2 = raster.read(debris_label2)
        debris_label2 = (debris_label2 == 1).astype(int)
        featuresdebris2 = changeDimension(featuresdebris2)
        debris_label2 = changeDimension (debris_label2)
        featuresdebris = np.vstack((featuresdebris,featuresdebris2))
        debris_label = np.hstack((debris_label,debris_label2))


print("Debris input data: ", featuresdebris.shape)
print("Debris label data: ", debris_label.shape)
print("prediction data: ", prediction.shape)

The "changeDimension()" function has moved to "array_to_table()" and will be deprecated in future versions. Please check the pyrsgis documentation at https://pyrsgis.readthedocs.io/en/master/ for more details.
The "changeDimension()" function has moved to "array_to_table()" and will be deprecated in future versions. Please check the pyrsgis documentation at https://pyrsgis.readthedocs.io/en/master/ for more details.
The "changeDimension()" function has moved to "array_to_table()" and will be deprecated in future versions. Please check the pyrsgis documentation at https://pyrsgis.readthedocs.io/en/master/ for more details.
The "changeDimension()" function has moved to "array_to_table()" and will be deprecated in future versions. Please check the pyrsgis documentation at https://pyrsgis.readthedocs.io/en/master/ for more details.
The "changeDimension()" function has moved to "array_to_table()" and will be deprecated in future versions. Please check the pyrsgis documentation at https://py

In [8]:
# Step 2. Splitting Dataset.

In [9]:
# Split testing and training datasets
xTrain, xTest, yTrain, yTest = train_test_split(featuresdebris, debris_label, test_size=0.3, random_state=100)

print(xTrain.shape)
print(yTrain.shape)

print(xTest.shape)
print(yTest.shape)

(3407351, 9)
(3407351,)
(1460294, 9)
(1460294,)


In [10]:
# Step 3. Scaling the data before training DNN

In [11]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(xTest)
xTrain, xTest = scaler.transform(xTrain), scaler.transform(xTest)
prediction = scaler.transform(prediction)

# Normalise the data
# xTrain = xTrain / 255.0
# xTest = xTest / 255.0
# prediction = prediction / 255.0

# Reshape the data
xTrain = xTrain.reshape((xTrain.shape[0], 1, xTrain.shape[1]))
xTest = xTest.reshape((xTest.shape[0], 1, xTest.shape[1]))
prediction = prediction.reshape((prediction.shape[0], 1, prediction.shape[1]))

# Print the shape of reshaped data
print(xTrain.shape, xTest.shape, prediction.shape)


(3407351, 1, 9) (1460294, 1, 9) (250000, 1, 9)


In [None]:
# Step 4. Defining and training deep neural network architecture (SGDNet)

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping

# Define the model
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(1, nBands)),
    # keras.layers.Dense(2048, activation='tanh'),
    keras.layers.Dense(1024, activation='tanh'),
    keras.layers.Dense(512, activation='tanh'),
    # keras.layers.Dropout(0.25),
    keras.layers.Dense(256, activation='tanh'),
    # keras.layers.Dropout(0.25),
    keras.layers.Dense(128, activation='tanh'),
    # keras.layers.Dropout(0.25),
    keras.layers.Dense(64, activation='tanh'),
    # keras.layers.Dropout(0.25),
    keras.layers.Dense(32, activation='tanh'),
    # keras.layers.Dropout(0.25),
    keras.layers.Dense(2, activation='softmax')
])

# Print the model summary
model.summary()

# Define optimizer
opt = keras.optimizers.Adam(learning_rate=0.0001, decay=1e-6)

# Compile the model
model.compile(optimizer=opt, loss="sparse_categorical_crossentropy", metrics=["accuracy"])

# Define the early stopping callback
early_stop = EarlyStopping(monitor='val_loss', mode='min', patience=20, verbose=1)

# Train the model
history = model.fit(
    xTrain, yTrain,
    epochs=100,
    validation_data=(xTest, yTest),
    verbose=1,
    callbacks=[early_stop],  # Pass callbacks as a list
    batch_size=2000
)


Epoch 1/100
[1m 998/1704[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m3:39[0m 311ms/step - accuracy: 0.9190 - loss: 0.1937

In [None]:
# Step 5. Analyzing model performance over test data:

In [None]:
#pd.DataFrame(model.history.history).plot(figsize=(10,10), dpi=300)
#plt.show()
# model_loss.plot()
plt.rcParams["font.family"] = "Times New Roman"
plt.figure(figsize=(10,10), dpi=300)
plt.text(100, 0.224, 'A', fontsize = 14, bbox = dict(facecolor = 'none', alpha = 0.9))
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
#plt.title('Model loss')
plt.ylabel('loss (%)')
plt.xlabel('Epoch')
plt.legend(['Train loss', 'Test loss'], loc='upper right')

plt.rcParams["font.family"] = "Times New Roman"
plt.figure(figsize=(10,10), dpi=300)
plt.text(20, 0.914, 'B', fontsize = 14, bbox = dict(facecolor = 'none', alpha = 0.9))
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
#plt.title('Model Accuracy')
plt.ylabel('accuracy (%)')
plt.xlabel('Epoch')
plt.legend(['Train accuracy', 'Test accuracy'], loc='lower right')

# assuming you stored your model.fit results in a 'history' variable:
# convert the history.history dict to a pandas DataFrame:     
hist_df = pd.DataFrame(history.history) 

# or save to csv: 
hist_csv_file = 'history.csv'
with open(hist_csv_file, mode='w') as f:
    hist_df.to_csv(f)

# print(history.history.keys())

# import pandas as pd

# model_loss = pd.DataFrame(model.history.history)
# model_loss.plot()

# Predict for test data 
yTestPredicted = model.predict(xTest)
yTestPredicted = yTestPredicted[:,1]
 
# Calculate and display the error metrics
yTestPredicted = (yTestPredicted>0.5).astype(int)
cMatrix = confusion_matrix(yTest, yTestPredicted)
pScore = precision_score(yTest, yTestPredicted)
rScore = recall_score(yTest, yTestPredicted)
#
print("Confusion matrix: for 14 nodes\n", cMatrix)
print("\nP-Score: %.3f, R-Score: %.3f" % (pScore, rScore))