In [None]:
from azure.storage.blob import BlobServiceClient
import json
import os
from PIL import Image
import matplotlib.pyplot as plt
import io  # Used to convert bytes to a file-like object
import cv2
import numpy as np
import pandas as pd
import random

# open CSV
df = pd.read_csv('METER_ML_train.csv')

#set up storage
connection_string = "DefaultEndpointsProtocol=https;AccountName=mlfinalexam5505462853;AccountKey=0c40lghglG5/GlNK9yujDQAgo38GKoS2I3DeC/g22hwAEIFANKpmC/TqOpRk4RCT1DbfNiHBFt72+AStB+PfUA==;EndpointSuffix=core.windows.net"
container_name = "meterml"

#create client
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
container_client = blob_service_client.get_container_client(container_name)

print(df.head())

In [None]:
# Dictionary to track image counts per type
image_counts = {k: 0 for k in ['Negative', 'CAFOs', 'WWTreatment', 'Landfills', 'RefineriesAndTerminals', 'ProcessingPlants', 'Mines']}


def horizontal_flip(image):
    return cv2.flip(image, 1)

def vertical_flip(image):
    return cv2.flip(image, 0)

def rotate_90_clockwise(image):
    return cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)

def increase_brightness_and_saturation(image):
    hsv_image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
    hsv_image[:, :, 1] += 25  # Add to saturation
    hsv_image[:, :, 2] += 15  # Add to brightness
    hsv_image = np.clip(hsv_image, 0, 255)
    return cv2.cvtColor(hsv_image, cv2.COLOR_HSV2RGB)

def add_gaussian_noise(image):
    noise = np.random.normal(loc=0.0, scale=25.0, size=image.shape)
    noisy_image = image.astype(np.float32) + noise
    noisy_image = np.clip(noisy_image, 0, 255)
    return noisy_image.astype(np.uint8)

def random_augment(image):
    methods = [
        horizontal_flip,
        vertical_flip,
        rotate_90_clockwise,
        increase_brightness_and_saturation,
        add_gaussian_noise
    ]

    try:
        # Randomly choose two different methods and apply them sequentially
        chosen_methods = random.sample(methods, 2)
        augmented_image = chosen_methods[0](image)
        augmented_image = chosen_methods[1](augmented_image)

        # Check if the augmentation resulted in an empty image
        if augmented_image.size == 0:
            print("Augmentation resulted in an empty image.")
            return None
        return augmented_image
    except Exception as e:
        print(f"An error occurred during augmentation: {str(e)}")
        return None

# Function to randomly augment image
#def random_augment(image):
    # Define augmentation methods
#    methods = [
#        lambda image: cv2.flip(image, 1),  # Horizontal flip
#        lambda image: cv2.flip(image, 0),  # Vertical flip
#        lambda image: cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE),  # Rotate 90 degrees
#        lambda image: cv2.cvtColor(cv2.add(cv2.cvtColor(image, cv2.COLOR_RGB2HSV), np.array([15, 25, 0])), cv2.COLOR_HSV2RGB),  # Increase brightness and saturation
#        lambda image: (image.astype(np.float32) + np.random.normal(loc=0.0, scale=25.0, size=image.shape)).clip(0, 255).astype(np.uint8)  # Add Gaussian noise
#    ]
    # Randomly select two different augmentation methods
#    chosen_methods = random.sample(methods, 2)
    
    # Apply the two chosen methods sequentially
#    augmented_image = chosen_methods[0](image)
#    augmented_image = chosen_methods[1](augmented_image)

# Function to upload a png file to Azure Blob Storage with a given folder name
def upload_png_to_blob(image, file_name):
    blob_name = f"augmented_final/{file_name}"
    blob_client = container_client.get_blob_client(blob_name)
    success, encoded_image = cv2.imencode('.png', image)
    if not success:
        print(f"Error encoding image: {file_name}")
        return None
    try:
        blob_client.upload_blob(encoded_image.tobytes(), overwrite=True)
        return blob_name, image_type
    except Exception as e:
        print(f"Error uploading image {file_name} to blob: {str(e)}")
        return None

# Initialize a global counter for augmented images
augmented_image_counter = 0

# Process images
for index, row in df.iterrows():
    image_type = row['Type']
    file_name = row['Image_Folder']
    file_path = file_name  # Adjust based on actual path format in CSV
    
    # Initialize image count for new types
    if image_type not in image_counts:
        image_counts[image_type] = 0
    
    if image_counts[image_type] < 10000:
    
        # Check if the blob exists before downloading
        blob_list = [blob.name for blob in container_client.list_blobs(name_starts_with=file_path)]
        if any(file_path in s for s in blob_list):

            # Download image from Azure Blob Storage
            blob = container_client.download_blob(file_path).readall()
            nparr = np.frombuffer(blob, np.uint8)
            image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)

            if image is None or image.size == 0:
                print(f"Error: Failed to decode image from blob storage for path {file_path}")
                continue
            
            # Augment the image
            augmented_image = random_augment(image)

            if augmented_image is None or augmented_image.size == 0:
                print(f"Error: Augmentation failed or resulted in an empty image for {file_path}")
                continue
            
            # Update the augmented image file name using the global counter
            augmented_file_name = f"{augmented_image_counter:05}.png"
                
            # Upload the augmented image back to blob
            result = upload_png_to_blob(augmented_image, augmented_file_name)
            if result:
                blob_name, image_type = result
                print(f"Uploaded: {blob_name}, Type: {image_type}")
    
            # Increment the global counter for each augmented image
            augmented_image_counter += 1
        else:
            print(f"Error: File not found in blob storage for path {file_path}")
        
print("Augmentation process completed.")