In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets  
import pandas as pd
from PIL import Image

In [27]:
# **1. Load and Prepare Data**
data = pd.read_csv('data/train/labels.csv')

class SatSightDataset(torch.utils.data.Dataset):
    def __init__(self, csv_file, image_dir, transform=None):
        self.data_info = csv_file
        self.image_dir = image_dir
        self.transform = transform  

    def __getitem__(self, index):
        path = self.image_dir + self.data_info.iloc[index, 0]
        print("Image Path:", path)  # Add this line
        image = Image.open(path).convert('RGB')  # Ensure images are RGB

        if self.transform:
            image = self.transform(image)

        label = torch.tensor([
            self.data_info.iloc[index, 1], 
            self.data_info.iloc[index, 2]
        ]).float() 

        return image, label

    def __len__(self):
        return len(self.data_info)

# Data transforms (consider augmentations if needed)
data_transform = transforms.Compose([
    transforms.Resize(256),  # Resize for a common input size
    transforms.CenterCrop(224),
    transforms.ToTensor(), 
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ImageNet normalization
])

sky_dataset = SatSightDataset(csv_file=data, image_dir='data/train/', transform=data_transform)



In [28]:

# Split into training and validation
train_size = int(0.8 * len(sky_dataset))
val_size = len(sky_dataset) - train_size
train_set, val_set = torch.utils.data.random_split(sky_dataset, [train_size, val_size])

dataloaders = {
    'train': torch.utils.data.DataLoader(train_set, batch_size=32, shuffle=True),
    'val': torch.utils.data.DataLoader(val_set, batch_size=32) 
}

# **2. Define Model Architecture**
class SatSightNet(nn.Module):
    def __init__(self):
        super(SatSightNet, self).__init__()
        self.cnn_layers = nn.Sequential(
            # Add convolutional layers, pooling, etc. (experiment here!)
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2) 
        )
        self.linear_layers = nn.Sequential(
            # Add fully-connected layers
            nn.Flatten(), 
            #nn.Linear(50 * 50 * 32,  256), # Adjust based on your model
            nn.Linear(self.calculate_input_size(32, 112, 112), 256),  # Dynamic calculation
            nn.ReLU(),
            nn.Linear(256, 2) # Output x, y rotation
        )
    
    def calculate_input_size(self, channels, height, width):
        return channels * height * width

    def forward(self, x):
        print(x.shape)  # Print shape after input
        x = self.cnn_layers(x)
        print(x.shape)  # Print shape after convolutional layers
        x = self.linear_layers(x)
        print(x.shape)  # Print shape after linear layers (before output)
        return x

model = SatSightNet()



In [29]:

# **3. Training Loop**
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 
model.to(device)

criterion = nn.MSELoss()  # Mean Squared Error for regression
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 3. Training Loop (Continued)
for epoch in range(10):  # Adjust number of epochs
    for phase in ['train', 'val']:
        if phase == 'train':
            model.train()
        else:
            model.eval()

        running_loss = 0.0

        for images, labels in dataloaders[phase]:
            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(images)
                loss = criterion(outputs, labels)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            running_loss += loss.item() * images.size(0)

        epoch_loss = running_loss / len(dataloaders[phase].dataset)

        print('Epoch {}/{} {} Loss: {:.4f}'.format(epoch+1, 10, phase, epoch_loss))

# 4. (Optional) Save the trained model
torch.save(model.state_dict(), 'models/sat-sight-model.pth') # Save the model


Image Path: data/train/image_00005.png
Image Path: data/train/image_00007.png
Image Path: data/train/image_00002.png
Image Path: data/train/image_00008.png
Image Path: data/train/image_00009.png
Image Path: data/train/image_00001.png
Image Path: data/train/image_00013.png
Image Path: data/train/image_00011.png
Image Path: data/train/image_00012.png
Image Path: data/train/image_00014.png
Image Path: data/train/image_00003.png
torch.Size([11, 3, 224, 224])
torch.Size([11, 32, 112, 112])
torch.Size([11, 2])
Epoch 1/10 train Loss: 11202.0938
Image Path: data/train/image_00010.png
Image Path: data/train/image_00006.png
Image Path: data/train/image_00004.png
torch.Size([3, 3, 224, 224])
torch.Size([3, 32, 112, 112])
torch.Size([3, 2])
Epoch 1/10 val Loss: 45657.1250
Image Path: data/train/image_00005.png
Image Path: data/train/image_00001.png
Image Path: data/train/image_00007.png
Image Path: data/train/image_00013.png
Image Path: data/train/image_00003.png
Image Path: data/train/image_00014