In [None]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

In [2]:
# Label encoder Function.
def label_encoder(csv_file):
    # Read the CSV file
    df = pd.read_csv(csv_file)

    # Remove the "Type_encoded" column
    df.drop("Type_encoded", axis=1, inplace=True)

    # Create an instance of the OneHotEncoder
    encoder = OneHotEncoder(sparse=False, dtype=int)

    # Fit and transform the "Types" column
    types_encoded = encoder.fit_transform(df[["Type"]])

    # Create a new column "Label" with the encoded arrays
    df["Label"] = types_encoded.tolist()

    return df



### Test dataset

In [4]:
df_test = label_encoder("METER_ML_test.csv")
print(df_test.head(5))

                                       Image_Folder   Type  \
0  samples/test_images/35.17852862_-79.99927082.png  CAFOs   
1      samples/test_images/47.863317_-92.810639.png  CAFOs   
2      samples/test_images/33.440833_-85.435833.png  CAFOs   
3  samples/test_images/45.12488405_-94.24194995.png  CAFOs   
4  samples/test_images/45.33317705_-94.50533971.png  CAFOs   

                   Label  
0  [1, 0, 0, 0, 0, 0, 0]  
1  [1, 0, 0, 0, 0, 0, 0]  
2  [1, 0, 0, 0, 0, 0, 0]  
3  [1, 0, 0, 0, 0, 0, 0]  
4  [1, 0, 0, 0, 0, 0, 0]  




In [5]:
df_test.to_csv("FINAL_METER_ML_test.csv", index=False)


### Validation Data set:

In [6]:
df_val = label_encoder("METER_ML_val.csv")
print(df_val.head(5))

                                        Image_Folder                    Type  \
0              samples/val_images/42.793_-76.106.png             WWTreatment   
1        samples/val_images/37.823062_-85.540612.png               Landfills   
2  samples/val_images/35.56162459029078_-80.99540...                Negative   
3  samples/val_images/36.030734677_-90.939679356.png  RefineriesAndTerminals   
4              samples/val_images/43.515_-96.025.png                Negative   

                   Label  
0  [0, 0, 0, 0, 0, 0, 1]  
1  [0, 1, 0, 0, 0, 0, 0]  
2  [0, 0, 0, 1, 0, 0, 0]  
3  [0, 0, 0, 0, 0, 1, 0]  
4  [0, 0, 0, 1, 0, 0, 0]  




In [7]:
df_val.to_csv("FINAL_METER_ML_val.csv", index=False)


### Train Data Set

In [10]:
df_train = label_encoder("METER_ML_train.csv")
print(df_train.head(5))

                                        Image_Folder         Type  \
0   samples/train_images/43.94827109_-93.6494963.png        CAFOs   
1  samples/train_images/43.00443446102501_-78.208...     Negative   
2             samples/train_images/38.616_-77.27.png  WWTreatment   
3        samples/train_images/44.67296_-95.15357.png        CAFOs   
4           samples/train_images/43.923_-111.611.png  WWTreatment   

                   Label  
0  [1, 0, 0, 0, 0, 0, 0]  
1  [0, 0, 0, 1, 0, 0, 0]  
2  [0, 0, 0, 0, 0, 0, 1]  
3  [1, 0, 0, 0, 0, 0, 0]  
4  [0, 0, 0, 0, 0, 0, 1]  




In [11]:
df_train.to_csv("FINAL_METER_ML_train.csv", index=False)
