## DISCLAIMER: This Notebook can not be executed as it need to connect to the Azure Storage Account used to store the images. It solely serves as demonstraion how the .npy files were created by extracting the file from Azure. 

In [None]:
import os
import io
import tensorflow as tf
import numpy as np
from azure.storage.blob import BlobServiceClient, ContainerClient
from azure.core.exceptions import ResourceNotFoundError
from PIL import Image
import matplotlib.pyplot as plt
import seaborn as sns
import ast
import time
from keras.models import load_model
import tempfile

import pandas as pd
from tensorflow.keras import layers, models
import tensorflow_hub as hub
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.applications.inception_v3 import InceptionV3
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Dense, Flatten, Dropout, GlobalAveragePooling2D
from tensorflow.keras.models import Model, Sequential
from keras.utils import plot_model
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, confusion_matrix
from keras.callbacks import Callback, ReduceLROnPlateau, EarlyStopping, ModelCheckpoint

In [None]:
pip install -r requirements.txt

In [None]:
#set up storage
connection_string = "<key>"
container_name = "meterml"

#create client
blob_service_client = BlobServiceClient.from_connection_string(connection_string)

In [None]:
image_size = 224
channels = 3

def load_image(image_path):
    """Fetches image data from Azure Blob Storage."""
    try:
        # Create a blob service client
        blob_service_client = BlobServiceClient(account_url="https://<account_name>.blob.core.windows.net", credential="<account_key>")
        blob_client = container_client.get_blob_client(image_path)

        # Download the blob's contents as bytes
        blob_data = blob_client.download_blob().readall()
        
        return blob_data
    
    except Exception as e:
        print(f"Error fetching image from Azure: {e}")
        # Return a default image or handle the error appropriately
        return tf.zeros((image_size, image_size, channels), dtype=tf.uint8).numpy()

def load_and_preprocess_image(path):
    """Loads an image, decodes it to grayscale, resizes, and normalizes it."""
    # Load image
    image_file = load_image(path)
    # Decode the image to grayscale
    image_tensor = tf.io.decode_image(image_file, channels=channels)
    # Resize the image
    image_resized = tf.image.resize(image_tensor, [image_size, image_size])
    # Normalize the image data
    image_normalized = image_resized / 255.0
    return image_normalized.numpy()

def create_data_lists(filenames, labels):
    """Creates lists of images and labels."""
    X = []
    y = []

    for path, label in zip(filenames, labels):
        image = load_and_preprocess_image(path)
        X.append(image)
        y.append(label)
    
    X_train = np.array(X)
    y_train = np.array(y)
    
    return X, y

In [None]:
#read csv files to get filepaths and labels 
df_train= pd.read_csv("FINAL_METER_ML_train_2000.csv")
df_val = pd.read_csv("FINAL_METER_ML_val.csv")
df_test = pd.read_csv("FINAL_METER_ML_test.csv")

# convert each string in the DataFrame to a list
df_train['Label'] = df_train['Label'].apply(ast.literal_eval).apply(np.array)
df_val['Label'] = df_val['Label'].apply(ast.literal_eval).apply(np.array)
df_test['Label'] = df_test['Label'].apply(ast.literal_eval).apply(np.array)

#define the test/train split in own arrays for images and labels
X_train = df_train['Image_Folder']
X_val = df_val['Image_Folder']
X_test = df_test['Image_Folder']

y_train = np.array(df_train['Label']).tolist()
y_val = np.array(df_val['Label']).tolist()
y_test = np.array(df_test['Label']).tolist()

#creaet the image arrays by retrieving the image information from azure
X_train, y_train = create_data_lists(X_train, y_train)
X_val, y_val = create_data_lists(X_val, y_val)
X_test, y_test = create_data_lists(X_test, y_test)

# Save the array to a file to later use in models
np.save('x_train.npy', X_train)
np.save('x_test.npy', X_test)
np.save('x_val.npy', X_val)


