<a href="https://colab.research.google.com/github/csabiu/KAML-2025/blob/main/KAML_images.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import the necessary packages



In [None]:
# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras

# Helper libraries
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import os

import warnings
warnings.filterwarnings('ignore')

#Download data sample

In [None]:
!wget -q https://github.com/csabiu/ML_tutorial/raw/refs/heads/master/class.tar.gz.parta{a,b,c,d,e,f,g,h,i} > tmp
!cat class.tar.gz.parta* > class.tar.gz
!gunzip class.tar.gz
!tar -xvf class.tar > tmp
!rm class.tar*
!rm tmp
!ls

In [None]:
# aletrnative
#!wget https://archive.kasi.re.kr/bigdata/temp/class.tar.gz
#!tar zxf class.tar.gz
#!rm class.tar.gz
#!ls

## Lets look at an image



In [None]:
jpgfile = Image. open("class/100134.jpg")
plt.imshow(jpgfile)
print(np.shape(jpgfile))


# Lets crop it and de-center

In [None]:
# Define the random ranslation shift
x_shift = np.random.randint(-20,20)
y_shift = np.random.randint(-20,20)

# Perform the translation shift using image.transform()
translated_image = jpgfile.transform(jpgfile.size, Image.AFFINE, (1, 0, x_shift, 0, 1, y_shift))
cropped_image = translated_image.crop((112,112,312,312))

# Display the translated image
plt.figure()
plt.imshow(cropped_image)
plt.title('Translated Image')
plt.show()

# Print shape of translated image
print(np.shape(translated_image))


# And lower the resolution

In [None]:
target_size = 32
lowres_image = cropped_image.resize((target_size,target_size),Image.Resampling.LANCZOS)
plt.imshow(lowres_image)

# Split into RGB colors

In [None]:
r,g,b=lowres_image.split()
plt.imshow(r)
plt.colorbar()

# Normalise the pixel values to (0,1)

In [None]:
im=np.zeros((target_size,target_size))
im[:,:]=r
im=im/255.
plt.imshow(im)
plt.colorbar()

# Load the labels (truth) data

(Image ID, galaxy type) - 0=smooth, 1=featured

In [None]:
data=np.loadtxt("class/truth.txt",dtype='i')
print(np.shape(data))
labels=data[:,1]

print(data[:10,:]) # print first 10 entries

# Lets transform all the images and save into an array

In [None]:
images=[]

for i in (data[:,0]):
  x_shift = np.random.randint(-20,20)
  y_shift = np.random.randint(-20,20)
  filename=str(int(i))+".jpg"
  jpgfile = Image. open("./class/"+filename)
  translated_image = jpgfile.transform(jpgfile.size, Image.AFFINE, (1, 0, x_shift, 0, 1, y_shift))
  cropped_image = translated_image.crop((112,112,312,312))
  lowres_image = cropped_image.resize((target_size,target_size),Image.Resampling.LANCZOS)
  images.append(np.array(lowres_image))

images=np.asarray(images)
images=images/255.

# remove color - make each chanel the same by averaging
images[:,:,:,0]=np.mean(images,axis=3)
images[:,:,:,1]=images[:,:,:,0]
images[:,:,:,2]=images[:,:,:,0]
print(np.shape(images))

# Split images and labels into training and test data

In [None]:
from sklearn.model_selection import train_test_split
train_data, test_data, train_label, test_label = train_test_split(images, labels, test_size=0.4, random_state=42)

print(np.shape(train_data))
print(np.shape(train_label))
print(np.shape(test_data))
print(np.shape(test_label))

In [None]:
class_names = ['Smooth', 'Features']
plt.figure(figsize=(10,10))
for i in range(25):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(train_data[i], cmap=plt.cm.binary)
    plt.xlabel(class_names[int(train_label[i])])
plt.show()

# Define a simple neural network

In [None]:
model = keras.Sequential([
    keras.layers.Input(shape=(target_size, target_size, 3)),
    keras.layers.Flatten(),
    keras.layers.Dense(64, activation=tf.nn.relu),
    keras.layers.Dense(2, activation=tf.nn.softmax)])
model.summary()

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
# early stopping callback
es = keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10, restore_best_weights=True, min_delta=0.0001)

# reduce lr on plateau callback
rlr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, verbose=1, min_delta=0.0001)

In [None]:
hist = model.fit(train_data, train_label, epochs=100, validation_split=0.2, callbacks=[es, rlr])

In [None]:
test_loss, test_acc = model.evaluate(test_data, test_label)
print('Test accuracy:', test_acc)

Set up some of plotting functions

In [None]:
# plot training loss and accuracy history
plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper right')

plt.subplot(1,2,2)
plt.plot(hist.history['accuracy'])
plt.plot(hist.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')

In [None]:
# plotting functions
def plot_image(i, predictions_array, true_label, img):
  predictions_array, true_label, img = predictions_array[i], true_label[i], img[i]
  plt.grid(False)
  plt.xticks([])
  plt.yticks([])

  plt.imshow(img, cmap=plt.cm.binary)

  predicted_label = np.argmax(predictions_array)
  if predicted_label == true_label:
    color = 'blue'
  else:
    color = 'red'

  plt.xlabel("{} {:2.0f}% ({})".format(class_names[predicted_label],
                                100*np.max(predictions_array),
                                class_names[int(true_label)]),
                                color=color)

def plot_value_array(i, predictions_array, true_label):
  predictions_array, true_label = predictions_array[i], true_label[i]
  plt.grid(False)
  plt.xticks([0,1])
  plt.yticks([])
  thisplot = plt.bar(range(2), predictions_array, color="#777777")
  plt.ylim([0, 1])
  predicted_label = np.argmax(predictions_array)

  thisplot[predicted_label].set_color('red')
  thisplot[true_label].set_color('blue')

# Make predictions on test data from the trained model

In [None]:
predictions = (model.predict(test_data[:,:,:]))

print("First 10 galaxies")
print("Predicted:",np.argmax(predictions[1:11],axis=1))
print("Truth:    ",np.int_(test_label[1:11]))

# Lets visialise these classifications

In [None]:
i = 4
plt.figure(figsize=(6,3))
plt.subplot(1,2,1)
plot_image(i, predictions, np.int_(test_label), test_data)
f=plt.subplot(1,2,2)
plot_value_array(i, predictions,  np.int_(test_label))
f.axes.set_xticklabels(["smooth","features"])
plt.show()

 Plot the first X test images, their predicted label, and the true label
 Color correct predictions in blue, incorrect predictions in red

In [None]:
num_rows = 5
num_cols = 4
num_images = num_rows*num_cols
plt.figure(figsize=(2*2*num_cols, 2*num_rows))

for i in range(num_images):
  plt.subplot(num_rows, 2*num_cols, 2*i+1)
  plot_image(i, predictions, np.int_(test_label), test_data)
  f=plt.subplot(num_rows, 2*num_cols, 2*i+2)
  f.axes.set_xticklabels(["smooth","features"])
  plot_value_array(i, predictions, np.int_(test_label))
plt.show()


# Define a new convolutional neural network model

In [None]:
def create_cnn_model():
  keras.backend.clear_session()
  model = keras.Sequential()
  model.add(keras.layers.Input(shape=(target_size, target_size,3)))
  model.add(keras.layers.Conv2D(32, (3, 3), activation='relu'))
  model.add(keras.layers.MaxPooling2D((2, 2)))
  model.add(keras.layers.Flatten())
  model.add(keras.layers.Dropout(0.25))
  model.add(keras.layers.Dense(64, activation='relu'))
  model.add(keras.layers.Dense(2, activation='softmax'))

  model.compile(optimizer='adam',
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])
  return model

model = create_cnn_model()

model.summary()

# Train the model on the training data

In [None]:
hist = model.fit(train_data, train_label, epochs=100, validation_split=0.2, callbacks=[es, rlr])

# Now make prediction and visualise
# Compare to previous plot

In [None]:
# plot training history again
plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper right')

plt.subplot(1,2,2)
plt.plot(hist.history['accuracy'])
plt.plot(hist.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')


In [None]:
predictions = model.predict(test_data)

num_rows = 5
num_cols = 4
num_images = num_rows*num_cols
plt.figure(figsize=(2*2*num_cols, 2*num_rows))
for i in range(num_images):
  plt.subplot(num_rows, 2*num_cols, 2*i+1)
  plot_image(i, predictions, np.int_(test_label), test_data)
  f=plt.subplot(num_rows, 2*num_cols, 2*i+2)
  f.axes.set_xticklabels(["smooth","features"])
  plot_value_array(i, predictions, np.int_(test_label))
plt.show()

In [None]:
# compute evaluation metrics: precision and recall
from sklearn.metrics import precision_score, recall_score
print("Precision:",precision_score(np.int_(test_label),np.argmax(predictions,axis=1)))
print("Recall:",recall_score(np.int_(test_label),np.argmax(predictions,axis=1)))


# Assignment 1:
*   Create you own CNN with different number of layers.
*   Can you improve the precision and recall values?


In [None]:
...blank...

# Now add some data augmentations

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator # Import from tensorflow.keras.preprocessing.image

# add data augmentation using a generator
datagen = ImageDataGenerator(width_shift_range=.1,
                             height_shift_range=.1,
                             horizontal_flip=True,
                             vertical_flip=True,
                             fill_mode='nearest')

datagen.fit(train_data[:-100])


# -------------------------------------------------
# visualise

idx = 0
original = train_data[idx]            # shape (H, W, C), dtype float32 or uint8

iterator = datagen.flow(
    original[np.newaxis, ...],        # add batch dimension
    batch_size=1,
    shuffle=False)
augmented = next(iterator)[0]         # take the first (and only) image

fig, axes = plt.subplots(1, 2, figsize=(6, 3))
axes[0].imshow((original))
axes[0].set_title("Original")
axes[0].axis("off")

axes[1].imshow((augmented))
axes[1].set_title("Augmented")
axes[1].axis("off")

plt.tight_layout()
plt.show()

In [None]:
# create fresh model
model = create_cnn_model()

# fit model with data augmentation!
hist = model.fit(datagen.flow(train_data[:-100], train_label[:-100],
                    batch_size=64),
                    epochs=100,
                    validation_data=(test_data[-100:], test_label[-100:]),
                    callbacks=[es, rlr]
                    )

# Test the trained model on the test data

In [None]:
test_loss, test_acc = model.evaluate(test_data, test_label)
print('Test accuracy:', test_acc)

In [None]:
# plot training history again
plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper right')

plt.subplot(1,2,2)
plt.plot(hist.history['accuracy'])
plt.plot(hist.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')

# Now make prediction and visualise
# Compare to previous plot

In [None]:
predictions = (model.predict(test_data))

num_rows = 5
num_cols = 4
num_images = num_rows*num_cols
plt.figure(figsize=(2*2*num_cols, 2*num_rows))
for i in range(num_images):
  plt.subplot(num_rows, 2*num_cols, 2*i+1)
  plot_image(i, predictions, np.int_(test_label), test_data)
  f=plt.subplot(num_rows, 2*num_cols, 2*i+2)
  f.axes.set_xticklabels(["smooth","features"])
  plot_value_array(i, predictions, np.int_(test_label))
plt.show()

In [None]:
# precision and recall
from sklearn.metrics import precision_score, recall_score

print("Precision:",precision_score(np.int_(test_label),np.argmax(predictions,axis=1)))
print("Recall:",recall_score(np.int_(test_label),np.argmax(predictions,axis=1)))

# Transfer Learning

In [None]:
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
from tensorflow.keras.layers import Input, Flatten, Dense
from tensorflow.keras.models import Model

# Load a pretrained network
# Load the VGG16 model without the top classification layer
base_model = VGG16(weights='imagenet', include_top=False, input_tensor=Input(shape=(target_size, target_size, 3)))

# Freeze the layers in the base model
for layer in base_model.layers:
    layer.trainable = False

# Add a new classification layer
x = base_model.output
x = Flatten()(x)
x = Dense(64, activation='relu')(x)
predictions = Dense(2, activation='softmax')(x) # 2 classes: smooth, featured

# Create the final model
model = Model(inputs=base_model.input, outputs=predictions)

model.summary()

In [None]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])


hist = model.fit(datagen.flow(train_data[:-100], train_label[:-100],
                    batch_size=64),
                    epochs=20,
                    validation_data=(test_data[-100:], test_label[-100:]),
                    )

# Fine-tuning

In [None]:
base_model.layers

In [None]:
# Unfreeze the top layers of the base model
for layer in base_model.layers[-10:]: # unfreeze last 5 layers
    layer.trainable = True

# Compile the model with a lower learning rate
model.compile(optimizer=tf.keras.optimizers.Adam(1e-5), # Lower learning rate
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
# Fine-tune the model
hist2 = model.fit(datagen.flow(train_data[:-100], train_label[:-100],
                    batch_size=64),
                    epochs=20,
                    validation_data=(test_data[-100:], test_label[-100:]),
                  )

In [None]:
plt.plot(hist.history['accuracy'],'r-')
plt.plot(hist.history['val_accuracy'],'b-')
plt.plot(np.arange(19,39),hist2.history['accuracy'],'r-')
plt.plot(np.arange(19,39),hist2.history['val_accuracy'],'b-')

plt.axvline(x=19,color='k',linestyle='--')
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')

In [None]:
predictions = (model.predict(test_data))

num_rows = 5
num_cols = 4
num_images = num_rows*num_cols
plt.figure(figsize=(2*2*num_cols, 2*num_rows))
for i in range(num_images):
  plt.subplot(num_rows, 2*num_cols, 2*i+1)
  plot_image(i, predictions, np.int_(test_label), test_data)
  f=plt.subplot(num_rows, 2*num_cols, 2*i+2)
  f.axes.set_xticklabels(["smooth","features"])
  plot_value_array(i, predictions, np.int_(test_label))
plt.show()

In [None]:
test_loss, test_acc = model.evaluate(test_data, test_label)
print('Test accuracy:', test_acc)

In [None]:
# calculate precision and recall
print("Precision:",precision_score(np.int_(test_label),np.argmax(predictions,axis=1)))
print("Recall:",recall_score(np.int_(test_label),np.argmax(predictions,axis=1)))