## Galaxy Classification Notebook:
---

just for usability while we develop early features and model.

We want to build a deep learning galaxy CNN classification model, using Python and PyTorch.

We're using the Galaxy10 DECals Dataset.
The dataset contains 256 by 256 pixel colored galaxy images (g, r, and z band),
containing 17736 images classified into 10 classes.

The model is based on morphological classification using Deep Convolution Neural
Network. 

The model also uses astroNN dataset.

The model architecture:
feature extraction: input layer -> convolution -> tanh -> pooling.
Then we have classification: fully connected layer -> tanh -> softmax.

In [None]:
# libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from astroNN.datasets import load_galaxy10
import json

# Note, unique dependency: AstroNN

import numpy as np
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from PIL import Image

In [None]:
## Dataset preprocessing:

images, labels = load_galaxy10()

In [None]:
# Convert images to PyTorch tensor
images = torch.from_numpy(images).permute(0, 3, 1, 2).float()  # Change data format to PyTorch (N, H, W, C) to (N, C, H, W)

# Use labels as they are (assuming labels are already a PyTorch tensor)
labels = torch.from_numpy(labels).long()
#images = 1 - images

# Define a custom PyTorch Dataset
class GalaxyDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

# Define a transformation for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
])

## Exploring the dataset for preprocessing:

In [None]:
# Create a custom PyTorch Dataset
galaxy_dataset = GalaxyDataset(images, labels) #, transform = transforms

# Split the dataset into training and testing sets
train_size = int(0.8 * len(galaxy_dataset))
test_size = len(galaxy_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(galaxy_dataset, [train_size, test_size])

# DataLoader
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


sample_image, sample_label = galaxy_dataset[7]

# Print the label
print(f"Label: {sample_label}")

# Convert the image tensor back to NumPy array for visualization
sample_image_np = (sample_image.permute(1, 2, 0).numpy() * 255).astype(np.uint8)

# Visualize the image as grayscale
plt.imshow(sample_image_np, cmap='gray')
plt.show()

In [None]:
# loading hyperparameters
"""
with open('params.json') as f:
    params = json.load(f)

# Extract hyperparameters
num_classes = params["num_classes"]
batch_size = params["batch_size"]
learning_rate = params["learning_rate"]
num_epochs = params["num_epochs"]
random_seed = params["random_seed"]
"""

num_classes = 10
batch_size = 32
learning_rate = 0.1
num_epochs = 40
random_seed = 42

# Set random seed for reproducibility
torch.manual_seed(random_seed)


In [None]:
# Define the CNN model
class GalaxyCNN(nn.Module):
    def __init__(self, num_classes=10):
        super(GalaxyCNN, self).__init__()

        # Feature extraction
        self.features = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.Tanh(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        # Classification
        self.classification = nn.Sequential(
            nn.Linear(16 * 128 * 128, 256),
            nn.Tanh(),
            nn.Linear(256, num_classes),
            nn.Softmax(dim=1),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classification(x)
        return x

In [None]:
# Instantiate the model
model = GalaxyCNN(num_classes=num_classes)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training the model
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for images, labels in test_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(test_loader)}')


In [None]:
# Evaluate the model
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total
print(f'Test Accuracy: {100 * accuracy:.2f}%')

## Notebook 2:
---

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
from torchvision import models
import torchvision
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from astroNN.datasets import load_galaxy10
import numpy as np
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset




In [2]:
class CNN(nn.Module):
    def __init__(self, num_classes):
        super(CNN, self).__init__()
        # Convolutional layers
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        # Max pooling layer
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        # Fully connected layers
        self.fc1 = nn.Linear(128 * 32 * 32, 512)  # Adjusted the input size here
        self.fc2 = nn.Linear(512, num_classes)
        # Dropout
        self.dropout = nn.Dropout(0.5)
        # Activation function
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool(x)
        x = self.relu(self.conv2(x))
        x = self.pool(x)
        x = self.relu(self.conv3(x))
        x = self.pool(x)
        # Check the size after convolution and pooling
        # print(x.size())  # Uncomment to print the size for debugging
        x = x.view(-1, 128 * 32 * 32)  # Adjusted the view size here
        x = self.dropout(x)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [7]:
class GalaxyDataset(Dataset):
    def __init__(self, data, labels, transform=None):
        self.data = data
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = {'data': self.data[idx], 'label': self.labels[idx]}

        if self.transform:
            sample = self.transform(sample)

        return sample


def to_categorical(y, num_classes):
    return np.eye(num_classes)[y]

def load_images_labels(size=17735):
    # To load images and labels (will download automatically at the first time)
    # First time downloading location will be ~/.astroNN/datasets/
    images, labels = load_galaxy10()

    # To convert the labels to categorical 10 classes
    #labels = to_categorical(labels, 10)
    # To convert to desirable type
    labels = torch.tensor(labels[:size], dtype=torch.float32)
    print("labels", labels.unique())
    images = torch.tensor(images[:size], dtype=torch.float32)
    return images, labels

def split_dataset(images, labels):
    train_idx, test_idx = train_test_split(np.arange(labels.shape[0]), test_size=0.1)
    train_images, train_labels, test_images, test_labels = images[train_idx], labels[train_idx], images[test_idx], labels[test_idx]
    return train_images, train_labels, test_images, test_labels

In [8]:
# Define the ConvNet architecture
class GalaxyCNN(nn.Module):
    def __init__(self, num_classes):
        super(GalaxyCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(16 * 64 * 64, 512)
        self.fc2 = nn.Linear(512, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = x.view(-1, 16 * 64 * 64)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        x = self.softmax(x)
        return x

In [17]:
if __name__ == "__main__":
    images, labels = load_images_labels()
    images = images.permute(0, 3, 1, 2)
    train_loader = zip(images, labels)
    print("labels", labels.unique())
    print("train_loader", train_loader)

    # Instantiate the model
    num_classes = 10  # Adjusted for 10 classes
    num_epochs = 5  # Define num_epochs
    model = CNN(num_classes)

    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # This part can be in another script where you load data, preprocess, and train the model.
    # For brevity, here's a placeholder for training the model using your dataset:

    # Training loop (you need to replace this with your actual data loading and training process)
    for epoch in range(num_epochs):  # Define num_epochs
        print("labels", labels[:outputs.shape[0]].long())
        for images, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(images)
            
            # Ensure the shapes of outputs and labels match
            # Resize labels to match the batch size of the outputs
            labels_resized = labels[:outputs.shape[0]].long()  # Convert labels to torch.long
            
            loss = criterion(outputs, labels_resized)
            loss.backward()
            optimizer.step()

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}")

    # Save the trained model (optional)
    torch.save(model.state_dict(), 'trained_model.pth')

C:\Users\dvirz\.astroNN\datasets\Galaxy10_DECals.h5 was found!
labels tensor([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
labels tensor([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
train_loader <zip object at 0x000001FB75C6FDC0>
labels tensor([0])


IndexError: slice() cannot be applied to a 0-dim tensor.

In [16]:
([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

NameError: name 'pytorch' is not defined