In [79]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from sklearn import preprocessing
import logging

#model building
import torch
import torchvision
from torchvision.transforms import v2
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from time import sleep


#plotting and evalueation
import seaborn as sns


DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

In [44]:
def pre_preprocessing(train_path, test_path, plot = True):

    #reading data 
    train_df = pd.read_csv(train_path)
    test_df = pd.read_csv(test_path)

    #Subtracting 1 from each value in target column to scale to 0 or1 
    y_train = (train_df["TARGET"]-1).to_numpy()

    #Saving the ID values
    train_ids = train_df["ID"]
    test_ids = test_df["ID"]

    #Dropping the ID colmn
    train_df = train_df.drop('ID', axis = 1)
    test_df = test_df.drop('ID', axis = 1)

    #Creating some additional features

    #Normalized Difference Vegetation Index
    train_df["NDVI"] = (train_df["nir_p50"] - train_df["red_p50"])/ (test_df["nir_p50"] + test_df["red_p50"]) 
    #Normalized Difference Water Index
    train_df["NDWI"] = (train_df["nir_p50"] - train_df["swir1_p50"])/ (test_df["nir_p50"] + test_df["swir1_p50"])

    train_df["red_green_ratio"] = train_df["red_p50"]/train_df["green_p50"]
    train_df["NIR_green_ratio"] = train_df["nir_p50"]/train_df["green_p50"]

    train_df["blue_red_ratio"] = train_df["blue_p50"]/train_df["red_p50"]
    train_df["swir_ratio"] = train_df["swir1_p50"]/train_df["swir1_p50"]

    train_df["VV_VH_ratio"] = train_df["VV_p50"]/train_df["VH_p50"]


    #convering to Numpy arrays
    x_train = train_df.to_numpy()
    x_test = test_df.to_numpy()

    #normalizing the data 
    x_train = preprocessing.normalize(x_train)
    x_test = preprocessing.normalize(x_test)

    #Converting to Torch Tensors
    x_train = torch.from_numpy(x_train).type(torch.float)
    x_test = torch.from_numpy(x_test).type(torch.float)
    y_train = torch.from_numpy(y_train).type(torch.float)

    return x_train, x_test, y_train, test_ids

In [26]:
#reading the data
kenya_train_df = pd.read_csv("geoai-challenge-for-agricultural-plastic-cover-mapping-with-satellite-imagery20240708-24674-1c1nnx3/Kenya_training.csv")
kenya_test_df = pd.read_csv("geoai-challenge-for-agricultural-plastic-cover-mapping-with-satellite-imagery20240708-24674-1c1nnx3/Kenya_testing.csv")

spain_train_df = pd.read_csv("geoai-challenge-for-agricultural-plastic-cover-mapping-with-satellite-imagery20240708-24674-1c1nnx3/Spain_training.csv")
spain_test_df = pd.read_csv("geoai-challenge-for-agricultural-plastic-cover-mapping-with-satellite-imagery20240708-24674-1c1nnx3/Spain_validation.csv")

vietnam_train_df = pd.read_csv("geoai-challenge-for-agricultural-plastic-cover-mapping-with-satellite-imagery20240708-24674-1c1nnx3/VNM_training.csv")
vietnam_test_df = pd.read_csv("geoai-challenge-for-agricultural-plastic-cover-mapping-with-satellite-imagery20240708-24674-1c1nnx3/VNM_testing.csv")

print(len(kenya_test_df.columns))
print(len(spain_test_df.columns))
print(len(vietnam_test_df.columns))


print((kenya_test_df.columns))
print((spain_test_df.columns))
print((vietnam_test_df.columns))



15
15
15
Index(['ID', 'lon', 'lat', 'blue_p50', 'green_p50', 'nir_p50', 'nira_p50',
       're1_p50', 're2_p50', 're3_p50', 'red_p50', 'swir1_p50', 'swir2_p50',
       'VV_p50', 'VH_p50'],
      dtype='object')
Index(['ID  ', 'lon          ', 'lat         ', 'blue_p50 ', 'green_p50 ',
       'nir_p50 ', 'nira_p50 ', 're1_p50 ', 're2_p50 ', 're3_p50 ', 'red_p50 ',
       'swir1_p50 ', 'swir2_p50 ', 'VV_p50       ', 'VH_p50'],
      dtype='object')
Index(['ID', 'Lon', 'Lat', 'blue_p50', 'green_p50', 'nir_p50', 'nira_p50',
       're1_p50', 're2_p50', 're3_p50', 'red_p50', 'swir1_p50', 'swir2_p50',
       'VV_p50', 'VH_p50'],
      dtype='object')


In [30]:
#Dropping the id Variable
y_train_kenya = (kenya_train_df["TARGET"]-1).to_numpy()

# y_train_vietnam = (vietnam_train_df["TARGET"]-1).to_numpy()
# y_test_vietnam = (vietnam_train_df["TARGET"]-1).to_numpy()

# y_train_spain = spain_train_df.to_numpy()
# y_test_spain = spain_test_df.to_numpy()


# #Dropping the id column
kenya_train_df = kenya_train_df.drop('ID', axis = 1)
kenya_test_df = kenya_test_df.drop('ID', axis = 1)

# vietnam_train_df = vietnam_train_df.drop('ID', axis = 1)
# vietnam_test_df = vietnam_test_df.drop('ID', axis = 1)

# spain_train_df = spain_train_df.drop('ID', axis = 1)
# spain_test_df = spain_test_df.drop('ID', axis = 1)



# #converting to np arrays
x_train_kenya = kenya_train_df.to_numpy()
x_test_kenya = kenya_test_df.to_numpy()

# x_train_vietnam = vietnam_train_df.to_numpy()
# x_test_vietnam = vietnam_test_df.to_numpy()

# x_train_spain = spain_train_df.to_numpy()
# x_test_spain = spain_test_df.to_numpy()



In [31]:
#normalizing the data 
x_train_kenya = preprocessing.normalize(x_train_kenya)
x_test_kenya = preprocessing.normalize(x_test_kenya)

# x_train_vietnam = preprocessing.normalize(x_train_vietnam)
# x_test_vietnam = preprocessing.normalize(x_test_vietnam)

# x_train_spain = preprocessing.normalize(x_train_spain)
# x_test_spain= preprocessing.normalize(x_test_spain)


# #Turning the data into torch torch tensors for gpu acceleration
x_train_kenya = torch.from_numpy(x_train_kenya).type(torch.float)
x_test_kenya = torch.from_numpy(x_test_kenya).type(torch.float)
y_train_kenya = torch.from_numpy(y_train_kenya).type(torch.float)

# x_train_vietnam = torch.from_numpy(x_train_vietnam).type(torch.float)
# x_test_vietnam= torch.from_numpy(x_test_vietnam).type(torch.float)
# y_train_vietnam = torch.from_numpy(y_train_vietnam).type(torch.float)
# y_test_vietnam = torch.from_numpy(y_test_vietnam).type(torch.float)

# x_train_spain = torch.from_numpy(x_train_spain).type(torch.float)
# x_test_spain = torch.from_numpy(x_test_spain).type(torch.float)
# y_train_spain = torch.from_numpy(y_train_spain).type(torch.float)
# y_test_spain = torch.from_numpy(y_test_spain).type(torch.float)

**Custom Dataset Class**

In [13]:
#Defining Custom Dataset
class CountryDataset(Dataset):
    def __init__(self, x, y):
        self.features = x
        self.labels = y

    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        return self.features[idx], self. labels[idx]


In [14]:
#appending all data framees

# Convert DataFrames to tensors before concatenation

**Instansiating Custom Datasets**

In [70]:
x_train_kenya, y_train_kenya,kenya_test_df, kenya_test_ids = pre_preprocessing(train_path = "geoai-challenge-for-agricultural-plastic-cover-mapping-with-satellite-imagery20240708-24674-1c1nnx3/Kenya_training.csv",
                                                               test_path = "geoai-challenge-for-agricultural-plastic-cover-mapping-with-satellite-imagery20240708-24674-1c1nnx3/Kenya_testing.csv")

In [71]:
batch_size = 32

#creating custom Datasets
train_dataset_kenya = CountryDataset(x_train_kenya, y_train_kenya)



#creating custom DataLoaders
kenya_train_dl = DataLoader(train_dataset_kenya,batch_size = batch_size, shuffle = True)

**Model Class**

In [72]:
#Creating the Module
class First_Model(nn.Module,):
    def __init__(self):
        super(First_Model, self).__init__()

        self.net = nn.Sequential(
            nn.Linear(22, 128),  # Input size 22features
            nn.ReLU(),
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.Linear(256, 1),   # Output size is 1 for binary classification
            nn.Sigmoid()         # Use Sigmoid for binary classification
        )

    def forward(self, x):
        return self.net(x)


**Function for training the model**

In [None]:
def training(model, loss_fn, train_loader, opt, epochs = 1000):

    running_losses = []
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0 
        for i, (datapoints, labels) in enumerate(tqdm(train_loader)):


            #forward proporgation
            outputs = model(datapoints)

            print("LABELS", labels.size())
            labels = labels.float().unsqueeze(1)

            print("OUTPUS", outputs.size())
            print("LABELS", labels.size())
            loss = loss_fn(outputs, labels)

            #backpropogation
            opt.zero_grad()
            loss.backward()
            opt.step()


            running_losses.append(loss.item())
            epoch_loss+=loss.item()

            # if (i+1) %10 == 0:
            #     print(f"epoch number {epoch+1}, loss = {loss.item()}")
    print("Training Finished")
    return running_losses


**Function to Calculate Accuracy of the Model**

In [76]:
def calculate_accuracy(model, test_loader):
    model.eval()  
    correct = 0
    total = 0

    with torch.no_grad():
        for datapoints, labels in test_loader:
            outputs = model(datapoints)

            #values between 0 and 1
            probabilities = torch.sigmoid(outputs)

            #binary classification
            predicted = (probabilities > 0.5).float()

            #adjusting shape
            labels = labels.unsqueeze(1)

            correct += (predicted == labels).sum().item()
            total += labels.size(0)


    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy}%")
    return accuracy

In [77]:
print(x_train_kenya.shape[1])

22


In [82]:
n_features = x_train_kenya.shape[1]
model = First_Model()
loss_function  = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(),lr = 0.001)

running_losses = training(model = model, loss_fn = loss_function, train_loader = kenya_train_dl, opt = optimizer)

accuracy =  calculate_accuracy(model, kenya_train_dl)

  0%|          | 0/32 [00:00<?, ?it/s]

OUTPUS torch.Size([32, 1])
LABELS torch.Size([32, 1, 14])





ValueError: Target size (torch.Size([32, 1, 14])) must be the same as input size (torch.Size([32, 1]))