In [None]:
import cv2
import keras
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
import seaborn as sns


from skimage.io import imread
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle


In [None]:
!mkdir data


mkdir: cannot create directory ‘data’: File exists


In [None]:
!mv PROF_seg_train.zip ./data/

mv: cannot stat 'PROF_seg_train.zip': No such file or directory


In [None]:
!mv PROF_seg_test.zip ./data


In [None]:
!unzip ./data/PROF_seg_train.zip -d ./data/

unzip:  cannot find or open ./data/PROF_seg_train.zip, ./data/PROF_seg_train.zip.zip or ./data/PROF_seg_train.zip.ZIP.


In [None]:
!unzip ./data/PROF_seg_test.zip -d ./data/


Archive:  ./data/PROF_seg_test.zip
   creating: ./data/PROF_seg_test/
   creating: ./data/PROF_seg_test/seg_test/
   creating: ./data/PROF_seg_test/seg_test/buildings/
  inflating: ./data/PROF_seg_test/seg_test/buildings/20057.jpg  
  inflating: ./data/PROF_seg_test/seg_test/buildings/20060.jpg  
  inflating: ./data/PROF_seg_test/seg_test/buildings/20061.jpg  
  inflating: ./data/PROF_seg_test/seg_test/buildings/20064.jpg  
  inflating: ./data/PROF_seg_test/seg_test/buildings/20073.jpg  
  inflating: ./data/PROF_seg_test/seg_test/buildings/20074.jpg  
  inflating: ./data/PROF_seg_test/seg_test/buildings/20078.jpg  
  inflating: ./data/PROF_seg_test/seg_test/buildings/20083.jpg  
  inflating: ./data/PROF_seg_test/seg_test/buildings/20094.jpg  
  inflating: ./data/PROF_seg_test/seg_test/buildings/20096.jpg  
  inflating: ./data/PROF_seg_test/seg_test/buildings/20113.jpg  
  inflating: ./data/PROF_seg_test/seg_test/buildings/20131.jpg  
  inflating: ./data/PROF_seg_test/seg_test/buildings

In [None]:
!unzip ./data/PROF_seg_test.zip -d ./data/


Archive:  ./data/PROF_seg_test.zip
replace ./data/PROF_seg_test/seg_test/buildings/20057.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [None]:
!unzip ./data/PROF_seg_train/PROF_seg_train.zip -d ./data/


In [None]:
!ls -lah data


In [None]:
!ls -lah data/PROF_seg_train


In [None]:
train_dir = './data/PROF_seg_train/'
test_dir = './data/PROF_seg_test/'

len(os.listdir(train_dir)), len(os.listdir(test_dir))


In [None]:
# view example images
imgs = os.listdir(train_dir)[:10]
imgs


In [None]:
img = imread(train_dir+imgs[1])
plt.imshow(img)
plt.axis('off')


In [None]:

# create dataframe with image paths
df = pd.DataFrame({'image': [train_dir + f for f in os.listdir(train_dir) if f.endswith('.png') and not f.startswith('mask_')]})
df['mask'] = df['image'].apply(lambda x: x.replace('/PROF_seg_train/','/PROF_seg_train/').replace('.png','_mask.png'))
df.head()


In [None]:
df.shape


In [None]:
# shuffle and split
df = shuffle(df, random_state=42).reset_index(drop=True)
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)
train_df.shape, val_df.shape


In [None]:
img = imread(sample.image)
mask = imread(sample.mask)
plt.figure(figsize=(8,4))
plt.subplot(1,2,1)
plt.imshow(img); plt.axis('off'); plt.title('image')
plt.subplot(1,2,2)
plt.imshow(mask); plt.axis('off'); plt.title('mask')


In [None]:
# Preprocessing functions
from keras.preprocessing.image import img_to_array, load_img
def read_and_resize(path, target_size=(224,224), grayscale=False):
    if grayscale:
        img = load_img(path, color_mode='grayscale', target_size=target_size)
    else:
        img = load_img(path, color_mode='rgb', target_size=target_size)
    return img_to_array(img)/255.0


In [None]:
# test read_and_resize
a = read_and_resize(sample.image, target_size=(224,224))
b = read_and_resize(sample.mask, target_size=(224,224), grayscale=True)
a.shape, b.shape


In [None]:
# dataset generator (simple)
def gen_dataframe(df, batch_size=8, target_size=(224,224)):
    while True:
        df = shuffle(df)
        for i in range(0, len(df), batch_size):
            batch = df.iloc[i:i+batch_size]
            X = np.zeros((len(batch), target_size[0], target_size[1], 3), dtype=np.float32)
            y = np.zeros((len(batch), target_size[0], target_size[1], 1), dtype=np.float32)
            for j, (_, row) in enumerate(batch.iterrows()):
                X[j] = read_and_resize(row['image'], target_size=target_size)
                y[j] = read_and_resize(row['mask'], target_size=target_size, grayscale=True)
            yield X, y


In [None]:
# test generator
g = gen_dataframe(train_df, batch_size=2)
Xb, yb = next(g)
Xb.shape, yb.shape


In [None]:
# Define a simple model using a pretrained base (transfer learning)
from keras.applications import VGG16
from keras.layers import Input, Conv2D, UpSampling2D, Concatenate, Conv2DTranspose
from keras.models import Model

input_tensor = Input(shape=(224,224,3))
base_model = VGG16(weights='imagenet', include_top=False, input_tensor=input_tensor)
base_model.trainable = False

x = base_model.output
x = Conv2D(256, (3,3), activation='relu', padding='same')(x)
x = UpSampling2D((2,2))(x)
x = Conv2D(128, (3,3), activation='relu', padding='same')(x)
x = UpSampling2D((2,2))(x)
x = Conv2D(64, (3,3), activation='relu', padding='same')(x)
x = UpSampling2D((2,2))(x)
x = Conv2D(32, (3,3), activation='relu', padding='same')(x)
x = UpSampling2D((2,2))(x)
x = Conv2D(16, (3,3), activation='relu', padding='same')(x)
x = UpSampling2D((2,2))(x)

output = Conv2D(1, (1,1), activation='sigmoid', padding='same')(x)

model = Model(inputs=input_tensor, outputs=output)
model.summary()


In [None]:
# compile
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
# quick test predict on a batch
pred = model.predict(Xb)
pred.shape


In [None]:
# show prediction vs ground truth
plt.figure(figsize=(12,6))
plt.subplot(1,3,1)
plt.imshow(Xb[0]); plt.axis('off'); plt.title('image')
plt.subplot(1,3,2)
plt.imshow(yb[0].squeeze(), cmap='gray'); plt.axis('off'); plt.title('mask')
plt.subplot(1,3,3)
plt.imshow(pred[0].squeeze(), cmap='gray'); plt.axis('off'); plt.title('pred')


In [None]:
# callbacks and training (very small epochs for demo)
from keras.callbacks import ModelCheckpoint, EarlyStopping
checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True, monitor='val_loss', mode='min')
early = EarlyStopping(patience=3, restore_best_weights=True)

train_gen = gen_dataframe(train_df, batch_size=8)
val_gen = gen_dataframe(val_df, batch_size=8)

steps_per_epoch = max(1, len(train_df)//8)
validation_steps = max(1, len(val_df)//8)


In [None]:
history = model.fit(train_gen, steps_per_epoch=steps_per_epoch, epochs=2, validation_data=val_gen, validation_steps=validation_steps, callbacks=[checkpoint, early])


In [None]:
# plot training loss
plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.legend()
plt.title('Loss')


In [None]:
# load best
model.load_weights('best_model.h5')


In [None]:
# predict on validation sample
val_sample = val_df.iloc[5]
Xi = np.expand_dims(read_and_resize(val_sample.image, (224,224)), 0)
pred = model.predict(Xi)[0]
plt.figure(figsize=(9,3))
plt.subplot(1,3,1); plt.imshow(Xi[0]); plt.axis('off'); plt.title('image')
plt.subplot(1,3,2); plt.imshow(read_and_resize(val_sample.mask, (224,224), grayscale=True).squeeze(), cmap='gray'); plt.axis('off'); plt.title('mask')
plt.subplot(1,3,3); plt.imshow(pred.squeeze(), cmap='gray'); plt.axis('off'); plt.title('pred')


In [None]:
# Fine-tuning: unfreeze some layers
for layer in base_model.layers[-4:]:
    layer.trainable = True

model.compile(optimizer=keras.optimizers.Adam(1e-5), loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
# fine-tune for 1 epoch (demo)
history_ft = model.fit(train_gen, steps_per_epoch=steps_per_epoch, epochs=1, validation_data=val_gen, validation_steps=validation_steps, callbacks=[checkpoint, early])


In [None]:
# plot combined loss
plt.plot(history.history['loss'], label='train_loss_pre')
plt.plot(history.history['val_loss'], label='val_loss_pre')
plt.plot(history_ft.history['loss'], label='train_loss_ft')
plt.plot(history_ft.history['val_loss'], label='val_loss_ft')
plt.legend()
plt.title('Loss pre vs ft')


In [None]:
# Evaluate on test set (simple loop)
test_images = [test_dir+f for f in os.listdir(test_dir) if f.endswith('.png') and not f.startswith('mask_')]
len(test_images)


In [None]:
# predict and save some results
os.makedirs('./preds', exist_ok=True)
for i, p in enumerate(test_images[:10]):
    Xi = np.expand_dims(read_and_resize(p, (224,224)), 0)
    pr = model.predict(Xi)[0]
    plt.imsave(f'./preds/pred_{i}.png', pr.squeeze(), cmap='gray')


In [None]:
!ls -lah ./preds | head


In [None]:
# show some predictions
import glob
pred_files = glob.glob('./preds/*.png')[:6]
plt.figure(figsize=(12,6))
for i, f in enumerate(pred_files):
    plt.subplot(2,3,i+1)
    img = imread(f)
    plt.imshow(img, cmap='gray'); plt.axis('off')


In [None]:
# Save model final
model.save('final_model.h5')


In [None]:
# Load and test loaded model
from keras.models import load_model
m2 = load_model('final_model.h5')
Xi = np.expand_dims(read_and_resize(test_images[0], (224,224)), 0)
pred = m2.predict(Xi)[0]
plt.imshow(pred.squeeze(), cmap='gray'); plt.axis('off')


In [None]:
# Metrics: compute dice for a sample
def dice_coef(y_true, y_pred, smooth=1e-6):
    y_true_f = y_true.flatten()
    y_pred_f = (y_pred.flatten()>0.5).astype(np.float32)
    intersection = np.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (np.sum(y_true_f) + np.sum(y_pred_f) + smooth)

y_true = read_and_resize(val_sample.mask, (224,224), grayscale=True)
dice_coef(y_true, pred)


In [None]:
# compute average dice on 10 test images
dice_scores = []
for p in test_images[:10]:
    Xi = np.expand_dims(read_and_resize(p, (224,224)), 0)
    pr = model.predict(Xi)[0]
    # load corresponding mask if exists
    maskp = p.replace('.png','_mask.png')
    if os.path.exists(maskp):
        y = read_and_resize(maskp, (224,224), grayscale=True)
        dice_scores.append(dice_coef(y, pr))
dice_scores, np.mean(dice_scores) if dice_scores else None


In [None]:
# show a confusion-like overlay for a sample
sample_img = read_and_resize(val_sample.image, (224,224))
sample_mask = read_and_resize(val_sample.mask, (224,224), grayscale=True).squeeze()
sample_pred = model.predict(np.expand_dims(sample_img,0))[0].squeeze()

plt.figure(figsize=(6,6))
plt.imshow(sample_img)
plt.imshow(np.ma.masked_where(sample_mask==0, sample_mask), cmap='Reds', alpha=0.5)
plt.imshow(np.ma.masked_where(sample_pred<=0.5, sample_pred), cmap='Blues', alpha=0.3)
plt.axis('off')


In [None]:
# Save a small dataframe with metrics
metrics_df = pd.DataFrame({'image': test_images[:len(dice_scores)], 'dice': dice_scores})
metrics_df.to_csv('test_metrics.csv', index=False)
metrics_df.head()


In [None]:
!head -n 5 test_metrics.csv


In [None]:
# display distribution of dice
sns.histplot(metrics_df['dice'])
plt.title('Dice distribution')


In [None]:
# analyze worst images
metrics_df.sort_values('dice').head()


In [None]:
worst = metrics_df.sort_values('dice').iloc[0]
worst


In [None]:
wimg = imread(worst.image)
wmask = imread(worst.image.replace('.png','_mask.png'))
wpred = imread('./preds/pred_0.png')
plt.figure(figsize=(12,4))
plt.subplot(1,3,1); plt.imshow(wimg); plt.axis('off'); plt.title('img')
plt.subplot(1,3,2); plt.imshow(wmask, cmap='gray'); plt.axis('off'); plt.title('mask')
plt.subplot(1,3,3); plt.imshow(wpred, cmap='gray'); plt.axis('off'); plt.title('pred')


In [None]:
# Clean up large variables
del model
del m2
import gc
gc.collect()


In [None]:
# Rebuild small inference model from saved file for deployment demo
from keras.models import load_model
inf_model = load_model('final_model.h5')
inf_model.summary()


In [None]:
# Demo: load an external image (if provided) and predict
# (replace 'external.png' with your filename)
if os.path.exists('external.png'):
    Xi = np.expand_dims(read_and_resize('external.png', (224,224)), 0)
    pr = inf_model.predict(Xi)[0]
    plt.imshow(pr.squeeze(), cmap='gray'); plt.axis('off')
else:
    print("No external.png found")


In [None]:
# show model input/output shapes
inf_model.input_shape, inf_model.output_shape


In [None]:
# Convert mask predictions to binary and save overlay on original
def save_overlay(original_path, pred, out_path):
    orig = imread(original_path)
    pred_bin = (pred.squeeze()>0.5).astype(np.uint8)
    plt.figure(figsize=(6,6))
    plt.imshow(orig)
    plt.imshow(np.ma.masked_where(pred_bin==0, pred_bin), cmap='Reds', alpha=0.5)
    plt.axis('off')
    plt.savefig(out_path, bbox_inches='tight')
    plt.close()

# example usage
save_overlay(test_images[0], inf_model.predict(np.expand_dims(read_and_resize(test_images[0], (224,224)),0))[0], 'overlay_0.png')


In [None]:
!ls -lah overlay_0.png


In [None]:
# show overlay
plt.imshow(imread('overlay_0.png')); plt.axis('off')


In [None]:
# small utility: visualize multiple predictions side-by-side
def show_predictions(img_paths, model, n=4):
    plt.figure(figsize=(12,6))
    for i, p in enumerate(img_paths[:n]):
        Xi = np.expand_dims(read_and_resize(p, (224,224)), 0)
        pr = model.predict(Xi)[0]
        plt.subplot(2, n, i+1)
        plt.imshow(read_and_resize(p, (224,224))); plt.axis('off'); plt.title('img')
        plt.subplot(2, n, n+i+1)
        plt.imshow(pr.squeeze(), cmap='gray'); plt.axis('off'); plt.title('pred')
    plt.tight_layout()

show_predictions(test_images, inf_model, n=4)


In [None]:
# export model to TF SavedModel format (if needed)
import tensorflow as tf
tf.saved_model.save(inf_model, 'saved_model_export')


In [None]:
# load back saved model (test)
mod = tf.saved_model.load('saved_model_export')
list(mod.signatures.keys())


In [None]:
# small end-of-notebook summary printout
print("Train samples:", len(train_df))
print("Val samples:", len(val_df))
print("Test samples:", len(test_images))
print("Preds saved in ./preds")


In [None]:
# show beginning of code that built the VGG base (for clarity)
for i, layer in enumerate(base_model.layers[:10]):
    print(i, layer.name, layer.trainable)


In [None]:
# final housekeeping
print("Done. Notebook cells executed.")
