In [26]:
# # This Python 3 environment comes with many helpful analytics libraries installed
# # It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# # For example, here's several helpful packages to load

# import numpy as np # linear algebra
# import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# # Input data files are available in the read-only "../input/" directory
# # For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# # You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# # You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [27]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import transforms, models
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
import scipy

In [28]:


# Define custom dataset
class Sat4Dataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        image = self.images[idx].astype(np.float32)
        label = self.labels[idx].astype(np.int64)
        
        if self.transform:
            image = self.transform(image)
        
        return image, label



In [29]:
# Load pre-trained ResNet and modify it for our task
class ResNetModel(nn.Module):
    def __init__(self):
        super(ResNetModel, self).__init__()
        self.resnet = models.resnet18(pretrained=True)
        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, 4)
    
    def forward(self, x):
        return self.resnet(x)

In [30]:

def image_processing(df_x):
    reshaped_X = df_x.values.reshape(-1, 28, 28, 4).astype(float)
    reshaped_X_new = reshaped_X / 255.0
    reshaped_X_rgb = reshaped_X_new[:, :, :, :3]
    return reshaped_X_rgb

def label_processing(df_y):
    df_y['Labels'] = "NA"
    for ix in range(len(df_y)):
        if df_y.iloc[ix, 0] == 1:
            df_y.iloc[ix, 4] = "Barren Land"
        elif df_y.iloc[ix, 1] == 1:
            df_y.iloc[ix, 4] = "Trees"
        elif df_y.iloc[ix, 2] == 1:
            df_y.iloc[ix, 4] = "Grassland"
        else:
            df_y.iloc[ix, 4] = "None"
    df_y = df_y['Labels']
    label_map = {"Barren Land": 0, "Trees": 1, "Grassland": 2, "None": 3}
    labels = df_y.map(label_map).values
    return labels


In [31]:
path = "/kaggle/input/deepsat4-subsets/"

In [32]:
df_x_train = pd.read_csv(path + "chunk_x_train_1.csv")
df_y_train = pd.read_csv(path + "chunk_y_train_1.csv")
df_x_test = pd.read_csv(path + "chunk_x_test_1.csv")
df_y_test = pd.read_csv(path + "chunk_y_test_1.csv")

In [33]:
reshaped_x_train_rgb = image_processing(df_x_train)
train_labels = label_processing(df_y_train)

In [34]:
reshaped_x_test_rgb = image_processing(df_x_test)
test_labels = label_processing(df_y_test)

In [35]:
print("Unique train labels:", np.unique(train_labels))
print("Unique test labels:", np.unique(test_labels))

Unique train labels: [0 1 2 3]
Unique test labels: [0 1 2 3]


In [36]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((224, 224)),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [37]:
train_dataset = Sat4Dataset(reshaped_x_train_rgb, train_labels, transform)
test_dataset = Sat4Dataset(reshaped_x_test_rgb, test_labels, transform)
train_size = len(train_dataset)
test_size = len(test_dataset)

In [38]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [39]:
# Check if GPU is available and set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [42]:
# Initialize model, loss function, and optimizer
model = ResNetModel().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [43]:
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)  # Move images and labels to GPU
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    
    print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(train_loader)}')



Epoch 1/10, Loss: 0.09784566311609617
Epoch 2/10, Loss: 0.05030644109310068
Epoch 3/10, Loss: 0.03734261839734087
Epoch 4/10, Loss: 0.033703596977799635
Epoch 5/10, Loss: 0.02256792621347485
Epoch 6/10, Loss: 0.025355051814895117
Epoch 7/10, Loss: 0.021344313670517034
Epoch 8/10, Loss: 0.01587795396662469
Epoch 9/10, Loss: 0.0186504463507661
Epoch 10/10, Loss: 0.013325952854656885


In [45]:
# Evaluation
model.eval()
y_true = []
y_pred = []
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)  # Move images and labels to GPU
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(predicted.cpu().numpy())

# Confusion matrix and classification report
print(confusion_matrix(y_true, y_pred))
print(classification_report(y_true, y_pred))



[[2625    0   28    2]
 [   0 1990   23    1]
 [   8    3 1839    0]
 [   3    0    0 3478]]
              precision    recall  f1-score   support

           0       1.00      0.99      0.99      2655
           1       1.00      0.99      0.99      2014
           2       0.97      0.99      0.98      1850
           3       1.00      1.00      1.00      3481

    accuracy                           0.99     10000
   macro avg       0.99      0.99      0.99     10000
weighted avg       0.99      0.99      0.99     10000

