In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import random
import numpy as np
from azure.storage.blob import BlobServiceClient
import json
import os
from PIL import Image
import io  # Used to convert bytes to a file-like object
import cv2


In [None]:
#set up storage
connection_string = "DefaultEndpointsProtocol=https;AccountName=mlfinalexam5505462853;AccountKey=0c40lghglG5/GlNK9yujDQAgo38GKoS2I3DeC/g22hwAEIFANKpmC/TqOpRk4RCT1DbfNiHBFt72+AStB+PfUA==;EndpointSuffix=core.windows.net"
container_name = "meterml"

#create client
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
container_client = blob_service_client.get_container_client(container_name)

In [None]:
train = pd.read_csv('METER_ML_train.csv')

In [None]:
train

# Check label distribution

In [None]:
#group to count label occurence
train_dist = train.groupby("Type").count().reset_index()
train_dist_v2 = train_dist[["Type", "Type_encoded"]]
train_dist_v2 = train_dist_v2.sort_values("Type_encoded", ascending=False)


#count the number of rows in the dataset
n = train.shape[0]

#calculate the realtive frequency of each label
train_dist["Frequency"] = train_dist["Type_encoded"]/n
train_dist = train_dist[["Type", "Frequency"]].sort_values("Frequency", ascending=False)
train_dist


#display histogram of the distribution
plt.figure(figsize=(10, 6))
bars = plt.bar(train_dist['Type'], train_dist['Frequency'], color='teal')

#add counts above the bars
for bar, count in zip(bars, train_dist_v2['Type_encoded']):
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2, yval, count, va='bottom')  # va='bottom' to place text above the bar

plt.title('Histogram of Frequency Distribution')
plt.xlabel('Type')
plt.ylabel('Frequency')
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plt.show()

This shows a significant imbalance which needs to be coped with.

Based on the absolute numbers above, 10.000 instances per label seems like a reasonable quantity.

## Setting up the Balancing

In [None]:
#number of needed instances per label
x = 10000

train_count = train_dist_v2.rename(columns={"Type_encoded": "Count"})

#calculate the deficit or surplus of instances per label
train_count["Deficit"] = x - train_count["Count"]
#need for augmentation if deficit is positive
train_count["Augment"] = [False if deficit < 0 else True for deficit in train_count["Deficit"]]
#
train_count["Factor"] = [round(x/c, 2) for aug, c, d in zip(train_count["Augment"], train_count["Count"], train_count["Deficit"])]

train_count



In [None]:
# Function to calculate average brightness
def calculate_brightness(image):
    grayscale_image = image.convert("L")
    pixel_values = np.array(grayscale_image)
    return np.mean(pixel_values)

# Function to calculate contrast
def calculate_contrast(image):
    grayscale_image = image.convert("L")
    stat = ImageStat.Stat(grayscale_image)
    return stat.stddev[0]

# Create a list to store filenames that need to be removed
files_to_remove = []

# Process each blob
blobs = container_client.list_blobs()
for blob in blobs:
    if blob.name.endswith(".jpg") and blob.name.startswith("samples/train_images"):
        print(f"Processing {blob.name}")
        blob_client = container_client.get_blob_client(blob.name)
        blob_data = blob_client.download_blob().readall()
        image_data = io.BytesIO(blob_data)
        image = Image.open(image_data)

        # Calculate brightness and contrast
        brightness = calculate_brightness(image)
        contrast = calculate_contrast(image)

        print(f"Brightness: {brightness}, Contrast: {contrast}")

        # Check if the image meets the removal criteria
        if brightness < 60 or contrast < 30:
            files_to_remove.append(blob.name)

print("Number of images to remove due to low quality: ", len(files_to_remove))

Since there are no images of low quality, as can be seen above, there must not be any additional cleaning of the dataset.

The Factor represents the multiplicator necessary to bring the sample size to ~10000.


In [None]:
def downsample(df, metadata):
    ds = pd.DataFrame(columns=['Image_Folder', 'Type', 'Type_encoded'])

    meta_ds = metadata[metadata["Augment"] == False]
    labels_ds = meta_ds['Type'].unique().tolist()

    for label in labels_ds:
        #just use data of certian label
        df_label = df[df["Type"] == label]
        
        #keep 8000 rows
        down_samp = df_label.sample(n=8000, random_state=42)

        #add the downsampled data to the dataset
        ds = pd.concat([ds, down_samp], ignore_index=True)
    
    meta_nds = metadata[metadata["Augment"] == True]
    labels_nds = meta_nds['Type'].unique().tolist()
    
    for label in labels_nds:
        df_label = df[df["Type"] == label]
        ds = pd.concat([ds, df_label], ignore_index=True)


    return ds

downsampled = downsample(train, train_count)
downsampled

downsampled.to_csv('METER_ML_train_downsampled.csv', index=False)


In [None]:
downsampled.shape

In [44]:
# Assuming you've initialized 'container_client' somewhere in your code as:
# connection_string = "your_connection_string_here"
# blob_service_client = BlobServiceClient.from_connection_string(connection_string)
# container_client = blob_service_client.get_container_client("your_container_name")

def upload_png_to_blob(image, file_name):
    blob_name = f"augmented_final/{file_name}"
    blob_client = container_client.get_blob_client(blob_name)
    success, encoded_image = cv2.imencode('.png', image)
    if not success:
        print(f"Error encoding image: {file_name}")
        return None
    try:
        blob_client.upload_blob(encoded_image.tobytes(), overwrite=True)
        return blob_name
    except Exception as e:
        print(f"Error uploading image {file_name} to blob: {str(e)}")
        return None

def horizontal_flip(image):
    return cv2.flip(image, 1)

def vertical_flip(image):
    return cv2.flip(image, 0)

def rotate_90_clockwise(image):
    return cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)

def increase_brightness_and_saturation(image):
    hsv_image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
    hsv_image[:, :, 1] += 25  # Add to saturation
    hsv_image[:, :, 2] += 15  # Add to brightness
    hsv_image = np.clip(hsv_image, 0, 255)
    return cv2.cvtColor(hsv_image, cv2.COLOR_HSV2RGB)

def add_gaussian_noise(image):
    noise = np.random.normal(loc=0.0, scale=25.0, size=image.shape)
    noisy_image = image.astype(np.float32) + noise
    noisy_image = np.clip(noisy_image, 0, 255)
    return noisy_image.astype(np.uint8)

def random_augment(image):
    methods = [
        horizontal_flip,
        vertical_flip,
        rotate_90_clockwise,
        increase_brightness_and_saturation,
        add_gaussian_noise
    ]

    try:
        # Randomly choose two different methods and apply them sequentially
        chosen_methods = random.sample(methods, 2)
        augmented_image = chosen_methods[0](image)
        augmented_image = chosen_methods[1](augmented_image)

        # Check if the augmentation resulted in an empty image
        if augmented_image.size == 0:
            print("Augmentation resulted in an empty image.")
            return None
        return augmented_image
    except Exception as e:
        print(f"An error occurred during augmentation: {str(e)}")
        return None

# Function to randomly augment image
#def random_augment(image):
    # Define augmentation methods
#    methods = [
#        lambda image: cv2.flip(image, 1),  # Horizontal flip
#        lambda image: cv2.flip(image, 0),  # Vertical flip
#        lambda image: cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE),  # Rotate 90 degrees
#        lambda image: cv2.cvtColor(cv2.add(cv2.cvtColor(image, cv2.COLOR_RGB2HSV), np.array([15, 25, 0])), cv2.COLOR_HSV2RGB),  # Increase brightness and saturation
#        lambda image: (image.astype(np.float32) + np.random.normal(loc=0.0, scale=25.0, size=image.shape)).clip(0, 255).astype(np.uint8)  # Add Gaussian noise
#    ]
    # Randomly select two different augmentation methods
#    chosen_methods = random.sample(methods, 2)
    
    # Apply the two chosen methods sequentially
#    augmented_image = chosen_methods[0](image)
#    augmented_image = chosen_methods[1](augmented_image)

# Function to process images for a given type
def process_images_for_type(df, image_type, needed_augmentations, container_client, counter):
    # Filter DataFrame for the given type
    df_type = df[df['Type'] == image_type]

    # Initialize counters
    augmented_image_counter = counter
    augmented_counts = 0
    augmentations_needed = needed_augmentations[image_type]
    print(augmentations_needed)

    # List to store results
    results = []
    while df_type.shape[0] < augmentations_needed:
        df_type = pd.concat([df_type, df_type], ignore_index=True)
    print(df_type.shape)

    # Main loop for image processing
    for index, row in df_type.iterrows():
        if augmented_counts < augmentations_needed:
            file_name = row['Image_Folder']
            file_path = file_name  # Adjust based on actual path format in CSV
            print(file_path)
            # Download and decode the image
            blob = container_client.download_blob(file_path).readall()
            nparr = np.frombuffer(blob, np.uint8)
            image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)

            if image is None or image.size == 0:
                print(f"Error: Failed to decode image from blob storage for path {file_path}")
            else:
                # Perform random augmentation
                augmented_image = random_augment(image)
                if augmented_image is not None and augmented_image.size != 0:
                    augmented_file_name = f"{augmented_image_counter:05}.png"
                    # Upload the augmented image back to blob
                    result = upload_png_to_blob(augmented_image, augmented_file_name)
                    if result:
                        blob_name = result
                        print(f"Uploaded: {blob_name}, Type: {image_type}")
                        results.append({'Image_Folder': blob_name, 'Type': image_type})
                        augmented_image_counter += 1
                        augmented_counts += 1

    # Convert results to DataFrame and return
    return pd.DataFrame(results)



In [47]:
# Example usage
df = pd.read_csv('METER_ML_train_downsampled.csv')
type_counts = df['Type'].value_counts()
needed_augmentations = (6000 - type_counts).to_dict()
needed_augmentations



{'Negative': -2000,
 'CAFOs': -2000,
 'WWTreatment': -2000,
 'Landfills': 2085,
 'RefineriesAndTerminals': 2183,
 'ProcessingPlants': 4170,
 'Mines': 4294}

In [32]:
cafos_df = process_images_for_type(df, "CAFOs", needed_augmentations, container_client, 0)
cafos_df

samples/train_images/37.4846615524_-120.911345486.png
Uploaded: augmented_final/00000.png, Type: CAFOs
samples/train_images/44.156046_-95.944686.png
Uploaded: augmented_final/00001.png, Type: CAFOs
samples/train_images/34.97102787_-77.83076532.png
Uploaded: augmented_final/00002.png, Type: CAFOs
samples/train_images/43.5597697_-92.60627194.png
Uploaded: augmented_final/00003.png, Type: CAFOs
samples/train_images/31.761336_-85.693611.png
Uploaded: augmented_final/00004.png, Type: CAFOs
samples/train_images/38.724818_-87.082898.png
Uploaded: augmented_final/00005.png, Type: CAFOs
samples/train_images/39.0275996_-75.9167334.png
Uploaded: augmented_final/00006.png, Type: CAFOs
samples/train_images/45.53997889_-96.61817112.png
Uploaded: augmented_final/00007.png, Type: CAFOs
samples/train_images/34.71769393_-78.80143095.png
Uploaded: augmented_final/00008.png, Type: CAFOs
samples/train_images/43.9752183_-93.82953817.png
Uploaded: augmented_final/00009.png, Type: CAFOs
samples/train_images/4

Unnamed: 0,Image_Folder,Type
0,augmented_final/00000.png,CAFOs
1,augmented_final/00001.png,CAFOs
2,augmented_final/00002.png,CAFOs
3,augmented_final/00003.png,CAFOs
4,augmented_final/00004.png,CAFOs
...,...,...
1995,augmented_final/01995.png,CAFOs
1996,augmented_final/01996.png,CAFOs
1997,augmented_final/01997.png,CAFOs
1998,augmented_final/01998.png,CAFOs


In [33]:
cafos_df.to_csv("save/cafos.csv")
cafos_df

Unnamed: 0,Image_Folder,Type
0,augmented_final/00000.png,CAFOs
1,augmented_final/00001.png,CAFOs
2,augmented_final/00002.png,CAFOs
3,augmented_final/00003.png,CAFOs
4,augmented_final/00004.png,CAFOs
...,...,...
1995,augmented_final/01995.png,CAFOs
1996,augmented_final/01996.png,CAFOs
1997,augmented_final/01997.png,CAFOs
1998,augmented_final/01998.png,CAFOs


In [34]:
wwt_df = process_images_for_type(df, "WWTreatment", needed_augmentations, container_client, 2001)
wwt_df.to_csv("save/wwt.csv")
wwt_df

samples/train_images/39.855_-83.183.png
Uploaded: augmented_final/02001.png, Type: WWTreatment
samples/train_images/29.27_-94.849.png
Uploaded: augmented_final/02002.png, Type: WWTreatment
samples/train_images/34.442_-97.503.png
Uploaded: augmented_final/02003.png, Type: WWTreatment
samples/train_images/44.629_-89.306.png
Uploaded: augmented_final/02004.png, Type: WWTreatment
samples/train_images/43.783_-74.285.png
Uploaded: augmented_final/02005.png, Type: WWTreatment
samples/train_images/45.779_-87.906.png
Uploaded: augmented_final/02006.png, Type: WWTreatment
samples/train_images/38.422_-90.589.png
Uploaded: augmented_final/02007.png, Type: WWTreatment
samples/train_images/35.612_-84.459.png
Uploaded: augmented_final/02008.png, Type: WWTreatment
samples/train_images/42.633_-92.051.png
Uploaded: augmented_final/02009.png, Type: WWTreatment
samples/train_images/40.834_-80.325.png
Uploaded: augmented_final/02010.png, Type: WWTreatment
samples/train_images/42.84_-93.614.png
Uploaded: au

Unnamed: 0,Image_Folder,Type
0,augmented_final/02001.png,WWTreatment
1,augmented_final/02002.png,WWTreatment
2,augmented_final/02003.png,WWTreatment
3,augmented_final/02004.png,WWTreatment
4,augmented_final/02005.png,WWTreatment
...,...,...
1995,augmented_final/03996.png,WWTreatment
1996,augmented_final/03997.png,WWTreatment
1997,augmented_final/03998.png,WWTreatment
1998,augmented_final/03999.png,WWTreatment


In [46]:
lf_df = process_images_for_type(df, "Landfills", needed_augmentations, container_client, 4001)
lf_df.to_csv("save/lf.csv")
lf_df

6085
(7830, 3)
samples/train_images/33.465_-111.8295.png
Uploaded: augmented_final/04001.png, Type: Landfills
samples/train_images/30.068694_-83.52375.png
Uploaded: augmented_final/04002.png, Type: Landfills
samples/train_images/38.452_-122.1829.png
Uploaded: augmented_final/04003.png, Type: Landfills
samples/train_images/34.962065_-78.448345.png
Uploaded: augmented_final/04004.png, Type: Landfills
samples/train_images/33.6322_-96.8419.png
Uploaded: augmented_final/04005.png, Type: Landfills
samples/train_images/36.943_-119.685.png
Uploaded: augmented_final/04006.png, Type: Landfills
samples/train_images/35.3983944014_-119.722441372.png
Uploaded: augmented_final/04007.png, Type: Landfills
samples/train_images/31.475001_-97.258334.png
Uploaded: augmented_final/04008.png, Type: Landfills
samples/train_images/45.471056_-98.621521.png
Uploaded: augmented_final/04009.png, Type: Landfills
samples/train_images/40.146017_-120.124045.png
Uploaded: augmented_final/04010.png, Type: Landfills
samp

Unnamed: 0,Image_Folder,Type
0,augmented_final/04001.png,Landfills
1,augmented_final/04002.png,Landfills
2,augmented_final/04003.png,Landfills
3,augmented_final/04004.png,Landfills
4,augmented_final/04005.png,Landfills
...,...,...
6080,augmented_final/10081.png,Landfills
6081,augmented_final/10082.png,Landfills
6082,augmented_final/10083.png,Landfills
6083,augmented_final/10084.png,Landfills


In [48]:
rat_df = process_images_for_type(df, "RefineriesAndTerminals", needed_augmentations, container_client, 10086)
rat_df.to_csv("save/rat.csv")
rat_df

2183
(3817, 3)
samples/train_images/42.783416974_-84.649150457.png
Uploaded: augmented_final/10086.png, Type: RefineriesAndTerminals
samples/train_images/45.647973_-84.469159.png
Uploaded: augmented_final/10087.png, Type: RefineriesAndTerminals
samples/train_images/44.879945_-93.010564.png
Uploaded: augmented_final/10088.png, Type: RefineriesAndTerminals
samples/train_images/39.122269_-94.440728.png
Uploaded: augmented_final/10089.png, Type: RefineriesAndTerminals
samples/train_images/41.753562_-86.115233.png
Uploaded: augmented_final/10090.png, Type: RefineriesAndTerminals
samples/train_images/43.08124_-86.21941.png
Uploaded: augmented_final/10091.png, Type: RefineriesAndTerminals
samples/train_images/48.47182_-122.55912.png
Uploaded: augmented_final/10092.png, Type: RefineriesAndTerminals
samples/train_images/31.364167_-89.267778.png
Uploaded: augmented_final/10093.png, Type: RefineriesAndTerminals
samples/train_images/27.952312_-82.437933.png
Uploaded: augmented_final/10094.png, Typ

Unnamed: 0,Image_Folder,Type
0,augmented_final/10086.png,RefineriesAndTerminals
1,augmented_final/10087.png,RefineriesAndTerminals
2,augmented_final/10088.png,RefineriesAndTerminals
3,augmented_final/10089.png,RefineriesAndTerminals
4,augmented_final/10090.png,RefineriesAndTerminals
...,...,...
2178,augmented_final/12264.png,RefineriesAndTerminals
2179,augmented_final/12265.png,RefineriesAndTerminals
2180,augmented_final/12266.png,RefineriesAndTerminals
2181,augmented_final/12267.png,RefineriesAndTerminals


In [50]:
pp_df = process_images_for_type(df, "ProcessingPlants", needed_augmentations, container_client, 12269)
pp_df.to_csv("save/pp.csv")
pp_df

4170
(7320, 3)
samples/train_images/32.672469000199165_-103.555320999692.png
Uploaded: augmented_final/12269.png, Type: ProcessingPlants
samples/train_images/35.974943000201755_-100.3305280001502.png
Uploaded: augmented_final/12270.png, Type: ProcessingPlants
samples/train_images/48.40152_-102.91418.png
Uploaded: augmented_final/12271.png, Type: ProcessingPlants
samples/train_images/27.65835313199984_-97.28413268600002.png
Uploaded: augmented_final/12272.png, Type: ProcessingPlants
samples/train_images/35.296422_-92.678525.png
Uploaded: augmented_final/12273.png, Type: ProcessingPlants
samples/train_images/37.45972200033353_-101.1697219996672.png
Uploaded: augmented_final/12274.png, Type: ProcessingPlants
samples/train_images/33.057548999890244_-103.60792050013602.png
Uploaded: augmented_final/12275.png, Type: ProcessingPlants
samples/train_images/39.487297_-108.111356.png
Uploaded: augmented_final/12276.png, Type: ProcessingPlants
samples/train_images/39.52649199959757_-80.51812699987

Unnamed: 0,Image_Folder,Type
0,augmented_final/12269.png,ProcessingPlants
1,augmented_final/12270.png,ProcessingPlants
2,augmented_final/12271.png,ProcessingPlants
3,augmented_final/12272.png,ProcessingPlants
4,augmented_final/12273.png,ProcessingPlants
...,...,...
4165,augmented_final/16434.png,ProcessingPlants
4166,augmented_final/16435.png,ProcessingPlants
4167,augmented_final/16436.png,ProcessingPlants
4168,augmented_final/16437.png,ProcessingPlants


In [51]:
mines_df = process_images_for_type(df, "Mines", needed_augmentations, container_client, 16438)
mines_df.to_csv("save/mines.csv")
mines_df

4294
(6824, 3)
samples/train_images/37.57534858113871_-87.54669968772745.png
Uploaded: augmented_final/16438.png, Type: Mines
samples/train_images/40.12896931565577_-78.80429442676085.png
Uploaded: augmented_final/16439.png, Type: Mines
samples/train_images/41.49986975053246_-108.706191831694.png
Uploaded: augmented_final/16440.png, Type: Mines
samples/train_images/39.74515201657318_-110.87906238748668.png
Uploaded: augmented_final/16441.png, Type: Mines
samples/train_images/40.7965109161787_-78.65946519923234.png
Uploaded: augmented_final/16442.png, Type: Mines
samples/train_images/40.361732169364686_-80.3723565647739.png
Uploaded: augmented_final/16443.png, Type: Mines
samples/train_images/34.794088816778604_-85.67949233636375.png
Uploaded: augmented_final/16444.png, Type: Mines
samples/train_images/38.07464496394584_-89.72625450187033.png
Uploaded: augmented_final/16445.png, Type: Mines
samples/train_images/37.29870661262944_-87.21874268124408.png
Uploaded: augmented_final/16446.png

Unnamed: 0,Image_Folder,Type
0,augmented_final/16438.png,Mines
1,augmented_final/16439.png,Mines
2,augmented_final/16440.png,Mines
3,augmented_final/16441.png,Mines
4,augmented_final/16442.png,Mines
...,...,...
4289,augmented_final/20727.png,Mines
4290,augmented_final/20728.png,Mines
4291,augmented_final/20729.png,Mines
4292,augmented_final/20730.png,Mines


In [52]:
# Example usage
df = pd.read_csv('METER_ML_train_downsampled.csv')
type_counts = df['Type'].value_counts()
needed_augmentations = (9000 - type_counts).to_dict()
needed_augmentations


{'Negative': 1000,
 'CAFOs': 1000,
 'WWTreatment': 1000,
 'Landfills': 5085,
 'RefineriesAndTerminals': 5183,
 'ProcessingPlants': 7170,
 'Mines': 7294}

In [53]:
####Adjustment of needed_augmentation necessary
negative_df = process_images_for_type(df, "Negative", needed_augmentations, container_client, 20732)
negative_df.to_csv("save/negative.csv")
negative_df

1000
(8000, 3)
samples/train_images/40.693441764855855_-106.67741682336091.png
Uploaded: augmented_final/20732.png, Type: Negative
samples/train_images/35.12582817186615_-96.78225291618212.png
Uploaded: augmented_final/20733.png, Type: Negative
samples/train_images/39.63801272919165_-90.75772497727404.png
Uploaded: augmented_final/20734.png, Type: Negative
samples/train_images/28.84344022231737_-82.66275580041895.png
Uploaded: augmented_final/20735.png, Type: Negative
samples/train_images/29.328057174703765_-89.59833779825269.png
Uploaded: augmented_final/20736.png, Type: Negative
samples/train_images/45.09744769320058_-109.81039491336816.png
Uploaded: augmented_final/20737.png, Type: Negative
samples/train_images/31.820349115038766_-86.7805559394074.png
Uploaded: augmented_final/20738.png, Type: Negative
samples/train_images/32.9355173403876_-116.08229494193051.png
Uploaded: augmented_final/20739.png, Type: Negative
samples/train_images/38.63452913560649_-94.53864225386616.png
Uploade

Unnamed: 0,Image_Folder,Type
0,augmented_final/20732.png,Negative
1,augmented_final/20733.png,Negative
2,augmented_final/20734.png,Negative
3,augmented_final/20735.png,Negative
4,augmented_final/20736.png,Negative
...,...,...
995,augmented_final/21727.png,Negative
996,augmented_final/21728.png,Negative
997,augmented_final/21729.png,Negative
998,augmented_final/21730.png,Negative


In [None]:
# df = pd.read_csv('METER_ML_train_downsampled.csv')

# # Count the occurrence of each type in the CSV
# type_counts = df['Type'].value_counts()

# # Calculate how many more images are needed to reach 10,000 for each type
# needed_augmentations = 10000 - type_counts

# # Convert needed_augmentations to a dictionary for easier manipulation
# needed_augmentations = needed_augmentations.to_dict()

# # Initialize a dictionary to track the number of augmented images
# augmented_counts = {type: 0 for type in needed_augmentations}

# # Initialize the DataFrame index for iteration
# index = 0
# max_index = len(df) - 1

# def horizontal_flip(image):
#     return cv2.flip(image, 1)

# def vertical_flip(image):
#     return cv2.flip(image, 0)

# def rotate_90_clockwise(image):
#     return cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)

# def increase_brightness_and_saturation(image):
#     hsv_image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
#     hsv_image[:, :, 1] += 25  # Add to saturation
#     hsv_image[:, :, 2] += 15  # Add to brightness
#     hsv_image = np.clip(hsv_image, 0, 255)
#     return cv2.cvtColor(hsv_image, cv2.COLOR_HSV2RGB)

# def add_gaussian_noise(image):
#     noise = np.random.normal(loc=0.0, scale=25.0, size=image.shape)
#     noisy_image = image.astype(np.float32) + noise
#     noisy_image = np.clip(noisy_image, 0, 255)
#     return noisy_image.astype(np.uint8)

# def random_augment(image):
#     methods = [
#         horizontal_flip,
#         vertical_flip,
#         rotate_90_clockwise,
#         increase_brightness_and_saturation,
#         add_gaussian_noise
#     ]

#     try:
#         # Randomly choose two different methods and apply them sequentially
#         chosen_methods = random.sample(methods, 2)
#         augmented_image = chosen_methods[0](image)
#         augmented_image = chosen_methods[1](augmented_image)

#         # Check if the augmentation resulted in an empty image
#         if augmented_image.size == 0:
#             print("Augmentation resulted in an empty image.")
#             return None
#         return augmented_image
#     except Exception as e:
#         print(f"An error occurred during augmentation: {str(e)}")
#         return None

# # Function to randomly augment image
# #def random_augment(image):
#     # Define augmentation methods
# #    methods = [
# #        lambda image: cv2.flip(image, 1),  # Horizontal flip
# #        lambda image: cv2.flip(image, 0),  # Vertical flip
# #        lambda image: cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE),  # Rotate 90 degrees
# #        lambda image: cv2.cvtColor(cv2.add(cv2.cvtColor(image, cv2.COLOR_RGB2HSV), np.array([15, 25, 0])), cv2.COLOR_HSV2RGB),  # Increase brightness and saturation
# #        lambda image: (image.astype(np.float32) + np.random.normal(loc=0.0, scale=25.0, size=image.shape)).clip(0, 255).astype(np.uint8)  # Add Gaussian noise
# #    ]
#     # Randomly select two different augmentation methods
# #    chosen_methods = random.sample(methods, 2)
    
#     # Apply the two chosen methods sequentially
# #    augmented_image = chosen_methods[0](image)
# #    augmented_image = chosen_methods[1](augmented_image)

# # Function to upload a png file to Azure Blob Storage with a given folder name
# def upload_png_to_blob(image, file_name):
#     blob_name = f"augmented_final/{file_name}"
#     blob_client = container_client.get_blob_client(blob_name)
#     success, encoded_image = cv2.imencode('.png', image)
#     if not success:
#         print(f"Error encoding image: {file_name}")
#         return None
#     try:
#         blob_client.upload_blob(encoded_image.tobytes(), overwrite=True)
#         return blob_name, image_type
#     except Exception as e:
#         print(f"Error uploading image {file_name} to blob: {str(e)}")
#         return None

# # Initialize a global counter for augmented images
# augmented_image_counter = 0

# # Main loop
# while any(count < needed_augmentations[type] for type, count in augmented_counts.items()):
#     row = df.iloc[index]
#     image_type = row['Type']
#     file_name = row['Image_Folder']
#     file_path = file_name  # Adjust based on actual path format in CSV

#     if augmented_counts[image_type] < needed_augmentations[image_type]:
#         # Image processing and augmentation logic
#         blob = container_client.download_blob(file_path).readall()
#         nparr = np.frombuffer(blob, np.uint8)
#         image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)

#         if image is None or image.size == 0:
#             print(f"Error: Failed to decode image from blob storage for path {file_path}")
#         else:
#             augmented_image = random_augment(image)
#             if augmented_image is not None and augmented_image.size != 0:
#                 augmented_file_name = f"{augmented_image_counter:05}.png"
#                 # Upload the augmented image back to blob
#                 result = upload_png_to_blob(augmented_image, augmented_file_name)
#                 if result:
#                     blob_name, image_type = result
#                     print(f"Uploaded: {blob_name}, Type: {image_type}")
#                 augmented_image_counter += 1
#                 augmented_counts[image_type] += 1

#     # Increment or reset index
#     index += 1
#     if index > max_index:
#         index = 0  # Reset index to loop over the DataFrame again

# print("Augmentation process completed.")

# #save the new resulting datframe to excel
# result.to_csv('FINAL_METER_ML_train_v2.csv', index=False)
# result