In [21]:
import os
import io
import tensorflow as tf
import numpy as np
from azure.storage.blob import BlobServiceClient, ContainerClient
from PIL import Image
import matplotlib.pyplot as plt
import ast
import pandas as pd

from tensorflow.keras import layers
import tensorflow_hub as hub


from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.models import Sequential

from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split

from keras.applications.inception_v3 import InceptionV3
from keras.layers import MaxPooling2D, Dense, Dropout, GlobalAveragePooling2D, Flatten
from keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint


### Connect to Azure

In [3]:
#set up storage
connection_string = "DefaultEndpointsProtocol=https;AccountName=mlfinalexam5505462853;AccountKey=0c40lghglG5/GlNK9yujDQAgo38GKoS2I3DeC/g22hwAEIFANKpmC/TqOpRk4RCT1DbfNiHBFt72+AStB+PfUA==;EndpointSuffix=core.windows.net"
container_name = "meterml"

#create client
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
container_client = blob_service_client.get_container_client(container_name)

### Load Image Paths and Labels

In [19]:
#get filepaths
df = pd.read_csv("METER_ML_test.csv")
print(df.head())

#create dictionary with labels and encoded labels
unique_types = df['Type'].unique()
unique_type_encoded = df['Type_encoded'].unique()

type_dict = {}

for i in range(len(unique_types)):
    type_dict[unique_type_encoded[i]] = unique_types[i]

sorted_type_dict = dict(sorted(type_dict.items()))

sorted_type_dict

                                       Image_Folder   Type  Type_encoded
0  samples/test_images/35.17852862_-79.99927082.png  CAFOs             0
1      samples/test_images/47.863317_-92.810639.png  CAFOs             0
2      samples/test_images/33.440833_-85.435833.png  CAFOs             0
3  samples/test_images/45.12488405_-94.24194995.png  CAFOs             0
4  samples/test_images/45.33317705_-94.50533971.png  CAFOs             0


{0: 'CAFOs',
 1: 'Landfills',
 2: 'Mines',
 3: 'Negative',
 4: 'ProcessingPlants',
 5: 'RefineriesAndTerminals',
 6: 'WWTreatment'}

### Train Test Split

In [5]:
# First Split:
X_temp, X_test, y_temp, y_test = train_test_split(df['Image_Folder'], df['Type_encoded'], test_size=0.15, random_state=42)

X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.176, random_state=42)


y_train = np.array(y_train).tolist()
y_val = np.array(y_val).tolist()
y_test = np.array(y_test).tolist()

print(len(X_train))
print(len(X_val))
print(len(X_test))

692
149
149


### Set Variables

In [6]:
image_size=224
channels=3

batch_size = 224 # Big enough to measure an F1-score
autotune = tf.data.experimental.AUTOTUNE # Adapt preprocessing and prefetching dynamically
shuffle_buffer_size = 1024 # Shuffle the training data by a chunck of 1024 observations

### Functions 
- create data (input for models)
- plot accuracy and loss of models
- metrices

In [7]:
def load_image(path):
    """Load an image from Azure Blob Storage."""
    blob_client = container_client.get_blob_client(path)
    blob_data = blob_client.download_blob().readall()  # Directly read all bytes
    return io.BytesIO(blob_data)

def load_and_preprocess_image(path):
    """Loads an image, decodes it to grayscale, resizes, and normalizes it."""
    # Load image
    image_file = load_image(path.numpy().decode('utf-8'))
    # Decode the image to grayscale
    image_tensor = tf.io.decode_image(image_file.getvalue(), channels=channels)
    # Resize the image
    image_resized = tf.image.resize(image_tensor, [image_size, image_size])
    # Normalize the image data
    image_normalized = image_resized / 255.0
    return image_normalized


def process_tensor(path, label):
    """Function to load an image from blob storage, decode, resize, and normalize it."""
    image_normalized = tf.py_function(load_and_preprocess_image, [path], tf.float32)
    # Ensure the shape is set correctly for grayscale
    image_normalized.set_shape([image_size, image_size, channels])
    return image_normalized, label

def create_dataset(filenames, labels, is_training=True):
    """Creates a TensorFlow dataset from filenames and labels."""
    dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
    dataset = dataset.map(process_tensor, num_parallel_calls=tf.data.AUTOTUNE)
    
    if is_training:
        dataset = dataset.cache()
        dataset = dataset.shuffle(buffer_size=1024)
        
    dataset = dataset.batch(256)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)
    
    return dataset

def plot_history(model):
    # summarize history for accuracy
    plt.plot(model.history['accuracy'])
    plt.plot(model.history['val_accuracy'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')
    plt.show()
    # summarize history for loss
    plt.plot(model.history['loss'])
    plt.plot(model.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')
    plt.show()


def print_predictions(model, ds):
    #predict
    for images, labels in ds:
        predictions = model.predict(images)  # Only pass image data
     #print(predictions[:1])
        for pred, label in zip(predictions, labels):
            print("Prediction:", pred, "Actual Label:", label.numpy())# Print the first prediction
        break

### Create the dataset

In [8]:
train_ds = create_dataset(X_train, y_train)
test_ds = create_dataset(X_test, y_test, False)
val_ds = create_dataset(X_val, y_val, False)

### Print the dataset

In [None]:
for images, labels in train_ds.take(1):  # Here, take(1) takes the first batch
    print("Images:", images.numpy())  # Convert tensor to numpy array and print
    print("Labels:", labels.numpy())  # Convert tensor to numpy array and print


### MobileNetV2: 
https://github.com/ashrefm/multi-label-soft-f1/blob/master/Multi-Label%20Image%20Classification%20in%20TensorFlow%202.0.ipynb

In [None]:
IMG_SIZE = 224
CHANNELS = 3
feature_extractor_url = "https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/feature_vector/4"
feature_extractor_layer = hub.KerasLayer(feature_extractor_url,
                                         input_shape=(IMG_SIZE,IMG_SIZE,CHANNELS))

feature_extractor_layer.trainable = False

model_mnv2 = tf.keras.Sequential([
    feature_extractor_layer,
    layers.Dense(1024, activation='relu', name='hidden_layer'),
    layers.Dense(7, activation='sigmoid', name='output')
])

model_mnv2.summary()

print_predictions(model_mnv2, test_ds)

### Inception.v3: 
https://towardsdatascience.com/understanding-the-amazon-rainforest-with-multi-label-classification-vgg-19-inceptionv3-5084544fb655

In [2]:
def create_inception_v3_model():
    inceptionv3 = InceptionV3(weights="imagenet", include_top=False, input_shape=(224, 224, 3))
    for layer in inceptionv3.layers:
        layer.trainable = False

    # Adding custom layers
    x = inceptionv3.output
    x = GlobalAveragePooling2D()(x)  # Ensure this reduces all spatial dimensions
    x = Dense(4096, activation="relu")(x)
    x = Dropout(0.1)(x)
    output = Dense(7, activation="softmax")(x)  # Adjust the number of output units to match the number of classes

    # Creating the final model
    model = Model(inputs=inceptionv3.input, outputs=output)
    model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer="adam", metrics=["accuracy"])
    
    return model

# Instantiate and compile the model
inceptionv3_model = create_inception_v3_model()

# Set up the model checkpoint
model_checkpoint = ModelCheckpoint('inceptionv3_model.keras', monitor="val_accuracy", verbose=1, save_best_only=True)

# Assuming train_ds, X_test, y_test are properly defined
history = inceptionv3_model.fit(train_ds,
                      validation_data = val_ds, 
                      epochs=10, 
                      callbacks=[model_checkpoint])

plot_history(history)
print_predictions(inceptionv3_model, test_ds)

NameError: name 'tf' is not defined

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 1s/step
Prediction: [0.01274665 0.02877691 0.15972051 0.4063261  0.09525526 0.02191788
 0.27525672] Actual Label: 6
Prediction: [3.9608635e-06 7.5503305e-04 2.1353243e-04 1.2476606e-03 6.6801149e-05
 9.9590027e-01 1.8127172e-03] Actual Label: 5
Prediction: [0.0459647  0.2145908  0.04191964 0.3156867  0.26957166 0.01276872
 0.09949772] Actual Label: 4
Prediction: [0.0031893  0.03031961 0.00656269 0.01690817 0.0049908  0.9241715
 0.01385791] Actual Label: 5
Prediction: [0.3868611  0.10223748 0.01723989 0.2559255  0.17762122 0.00570343
 0.0544114 ] Actual Label: 0
Prediction: [0.11963693 0.18464087 0.06165045 0.16074783 0.31524956 0.01238855
 0.14568576] Actual Label: 4
Prediction: [0.00162375 0.1069823  0.04688137 0.7730318  0.03719909 0.00218598
 0.03209578] Actual Label: 6
Prediction: [0.00623571 0.14008905 0.24893196 0.40581325 0.09949404 0.00954302
 0.08989301] Actual Label: 2
Prediction: [0.01725106 0.02281215 0.11729892 

### VGG16: 
https://towardsdatascience.com/transfer-learning-with-vgg16-and-keras-50ea161580b4

In [1]:
from tensorflow.keras import layers, models
from keras.callbacks import EarlyStopping

from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input

## Loading VGG16 model
base_model = VGG16(weights="imagenet", include_top=False, input_shape=(224,224,3))
base_model.trainable = False ## Not trainable weights

base_model.summary()

flatten_layer = layers.Flatten()
dense_layer_1 = layers.Dense(50, activation='relu')
dense_layer_2 = layers.Dense(20, activation='relu')
prediction_layer = layers.Dense(7, activation='softmax')


model_vgg = models.Sequential([
    base_model,
    flatten_layer,
    dense_layer_1,
    dense_layer_2,
    prediction_layer
])


from keras.callbacks import EarlyStopping

model_vgg.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy'],
)

es = EarlyStopping(monitor='val_accuracy', mode='max', patience=5,  restore_best_weights=True)

history = model_vgg.fit(train_ds, validation_data = val_ds, epochs=10, batch_size=32, callbacks=[es])

plot_history(history)
print_predictions(model_vgg, test_ds)

NameError: name 'tf' is not defined

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 6s/step
Prediction: [0.00410895 0.05245112 0.22482607 0.5738788  0.06359594 0.0196357
 0.06150341] Actual Label: 6
Prediction: [0.04739311 0.13381815 0.07329758 0.20623805 0.20359476 0.24553663
 0.09012174] Actual Label: 5
Prediction: [0.08316551 0.13145676 0.02692242 0.14067535 0.2365763  0.27716824
 0.10403534] Actual Label: 4
Prediction: [0.07373614 0.07812826 0.01414828 0.10797587 0.26289803 0.40943378
 0.05367956] Actual Label: 5
Prediction: [0.08507486 0.06393594 0.02727791 0.39406055 0.0944519  0.11762462
 0.21757422] Actual Label: 0
Prediction: [0.04525127 0.16755691 0.1054664  0.2691845  0.10189566 0.04994841
 0.2606969 ] Actual Label: 4
Prediction: [0.00710113 0.04577387 0.08755973 0.6290746  0.13840704 0.03787905
 0.05420462] Actual Label: 6
Prediction: [0.00536634 0.07690225 0.43717018 0.3007303  0.01571154 0.0060522
 0.1580673 ] Actual Label: 2
Prediction: [0.04435897 0.10265122 0.1951215  0.31721547 0.11015192

### ResNet50: 
https://datagen.tech/guides/computer-vision/resnet-50/

In [None]:
resnet_model = Sequential()

rn50_base = tflow.keras.applications.ResNet50(
    weights = "imagenet",
    input_shape=(224,224,3)
    )

for each_layer in rn50_base.layers:

        each_layer.trainable=False

resnet_model.add(rn50_base)

resnet_model.add(Flatten())

resnet_model.add(Dense(512, activation='relu'))

resnet_model.add(Dense(7, activation='sigmoid'))

resnet_model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

history = resnet_model.fit(train_ds, validation_data = val_ds, epochs=10)

plot_history(resnet_model)
print_predictions(resnet_model, test_ds)



Epoch 1/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 19s/step - accuracy: 0.1033 - loss: 4.1370 - val_accuracy: 0.0867 - val_loss: 4.4002
Epoch 2/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 19s/step - accuracy: 0.1352 - loss: 4.1698 - val_accuracy: 0.0867 - val_loss: 4.3835
Epoch 3/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 22s/step - accuracy: 0.1479 - loss: 4.0888 - val_accuracy: 0.0867 - val_loss: 4.3678
Epoch 4/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 21s/step - accuracy: 0.1323 - loss: 4.0540 - val_accuracy: 0.0867 - val_loss: 4.3515
Epoch 5/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 20s/step - accuracy: 0.1338 - loss: 4.1141 - val_accuracy: 0.0867 - val_loss: 4.3346
Epoch 6/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 21s/step - accuracy: 0.1333 - loss: 4.0541 - val_accuracy: 0.0867 - val_loss: 4.3170
Epoch 7/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x30b99c160>

In [None]:
print_predictions(resnet_model, test_ds)

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 6s/step
Prediction: [0.808961   0.3408545  0.12461656 0.16093753 0.05578974 0.320024
 0.44170302] Actual Label: [1 0 0 0 0 1 0]
Prediction: [0.745914   0.3542185  0.1230882  0.18771501 0.05531713 0.31288648
 0.424262  ] Actual Label: [0 0 0 0 0 0 0]
Prediction: [0.74697053 0.34362268 0.10176412 0.16764787 0.04852271 0.28684488
 0.39359173] Actual Label: [1 1 0 0 0 0 1]
Prediction: [0.8429002  0.32981536 0.1052464  0.13855554 0.04150832 0.31023103
 0.44934812] Actual Label: [1 0 0 0 1 1 0]
Prediction: [0.8301681  0.3485725  0.1101438  0.14774078 0.04754231 0.32860145
 0.44287384] Actual Label: [1 1 0 1 0 1 1]
Prediction: [0.77808714 0.3326846  0.09886213 0.15719482 0.03974034 0.28754458
 0.42077675] Actual Label: [0 0 0 0 0 1 0]
Prediction: [0.7709208  0.33817938 0.09353303 0.15388034 0.03791275 0.29584286
 0.40164486] Actual Label: [0 0 0 0 1 0 0]
Prediction: [0.76769364 0.32618228 0.08814462 0.1537395  0.0339381  0.2706281