In [1]:
pip install astroNN

Collecting astroNN
  Downloading astroNN-1.1.0-py3-none-any.whl.metadata (5.0 kB)
Collecting astroquery (from astroNN)
  Downloading astroquery-0.4.7-py3-none-any.whl.metadata (7.2 kB)
Collecting keras<2.16,>=2.15.0 (from tensorflow>=2.11.0->astroNN)
  Downloading keras-2.15.0-py3-none-any.whl.metadata (2.4 kB)
Collecting pyvo>=1.1 (from astroquery->astroNN)
  Downloading pyvo-1.5.1-py3-none-any.whl.metadata (4.7 kB)
Downloading astroNN-1.1.0-py3-none-any.whl (9.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.3/9.3 MB[0m [31m19.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading astroquery-0.4.7-py3-none-any.whl (5.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.3/5.3 MB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading keras-2.15.0-py3-none-any.whl (1.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m37.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyvo-1.5.1-py3-none-any.wh

In [2]:
from astroNN.datasets import load_galaxy10
from tensorflow.keras import utils
import numpy as np
from sklearn.model_selection import StratifiedShuffleSplit

# Load images and labels (will download automatically the first time)
images, labels = load_galaxy10()

# Convert the labels to categorical 10 classes
labels_categorical = utils.to_categorical(labels, 10)

# Convert to desirable type
labels_categorical = labels_categorical.astype(np.float32)
images = images.astype(np.float32)

# Split the dataset into training and testing sets while maintaining class distribution
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.1, random_state=0)
for train_idx, test_idx in sss.split(images, labels):
    train_images, train_labels = images[train_idx], labels_categorical[train_idx]
    test_images, test_labels = images[test_idx], labels_categorical[test_idx]

# Import Image from PIL for image processing
from PIL import Image
import os

# Function to save images remains the same
def save_images(images, labels, base_folder="Galaxy10_images"):
    # Ensure base directory exists
    if not os.path.exists(base_folder):
        os.makedirs(base_folder)

    # Loop through each image and its corresponding label
    for idx, (image, label) in enumerate(zip(images, np.argmax(labels, axis=1))):
        # Define the directory name based on the label
        directory = os.path.join(base_folder, str(label))
        # Ensure this directory exists
        if not os.path.exists(directory):
            os.makedirs(directory)

        # Normalize and scale the image to uint8
        normalized_image = ((image - np.min(image)) / (np.max(image) - np.min(image))) * 255
        image_uint8 = normalized_image.astype('uint8')

        # Create an image from the numpy array
        image_pil = Image.fromarray(image_uint8)

        # Define the file name
        file_name = f"image_{idx}.png"

        # Save the image in the corresponding labeled folder
        image_pil.save(os.path.join(directory, file_name))

# Usage of save_images function remains the same
save_images(train_images, train_labels, base_folder="Galaxy10_train_images")
save_images(test_images, test_labels, base_folder="Galaxy10_test_images")

# Function to count images in folders remains the same
import os

def count_images_in_folders(base_folder):
    # Dictionary to hold the count of images per label
    label_counts = {}
    
    # List all items in the base folder
    for label_folder in os.listdir(base_folder):
        label_folder_path = os.path.join(base_folder, label_folder)
        
        # Check if the item is a directory
        if os.path.isdir(label_folder_path):
            # Count the number of image files in this directory
            count = len([name for name in os.listdir(label_folder_path) if os.path.isfile(os.path.join(label_folder_path, name))])
            label_counts[label_folder] = count
    
    return label_counts

# Usage
train_counts = count_images_in_folders("Galaxy10_train_images")
test_counts = count_images_in_folders("Galaxy10_test_images")

print("Training set counts per class:", train_counts)
print("Test set counts per class:", test_counts)

# Usage of count_images_in_folders function remains the same

# Now you can continue to save your images and count them as before
save_images(train_images, train_labels, base_folder="Galaxy10_train_images")
save_images(test_images, test_labels, base_folder="Galaxy10_test_images")

# After saving, you can count the images in each folder to verify the stratified distribution
train_counts = count_images_in_folders("Galaxy10_train_images")
test_counts = count_images_in_folders("Galaxy10_test_images")

print("Training set counts per class:", train_counts)
print("Test set counts per class:", test_counts)

2024-04-02 19:50:17.337094: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-02 19:50:17.337237: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-02 19:50:17.521110: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Galaxy10_DECals.h5: 100%|█████████▉| 2.73G/2.74G [03:55<00:00, 12.7MB/s]

Downloaded Galaxy10 successfully to /root/.astroNN/datasets/Galaxy10_DECals.h5


Galaxy10_DECals.h5: 2.74GB [04:04, 11.2MB/s]                            


Training set counts per class: {'2': 2380, '0': 973, '7': 2365, '3': 1824, '5': 1839, '6': 1646, '9': 1686, '8': 1281, '1': 1668, '4': 300}
Test set counts per class: {'2': 265, '0': 108, '7': 263, '3': 203, '5': 204, '6': 183, '9': 187, '8': 142, '1': 185, '4': 34}
Training set counts per class: {'2': 2380, '0': 973, '7': 2365, '3': 1824, '5': 1839, '6': 1646, '9': 1686, '8': 1281, '1': 1668, '4': 300}
Test set counts per class: {'2': 265, '0': 108, '7': 263, '3': 203, '5': 204, '6': 183, '9': 187, '8': 142, '1': 185, '4': 34}


In [3]:
!zip -r Galaxy10_train_images.zip Galaxy10_train_images
!zip -r Galaxy10_test_images.zip Galaxy10_test_images




  adding: Galaxy10_train_images/ (stored 0%)
  adding: Galaxy10_train_images/2/ (stored 0%)
  adding: Galaxy10_train_images/2/image_10958.png (deflated 0%)
  adding: Galaxy10_train_images/2/image_8704.png (deflated 0%)
  adding: Galaxy10_train_images/2/image_11062.png (deflated 0%)
  adding: Galaxy10_train_images/2/image_13549.png (deflated 0%)
  adding: Galaxy10_train_images/2/image_1120.png (deflated 0%)
  adding: Galaxy10_train_images/2/image_3179.png (deflated 0%)
  adding: Galaxy10_train_images/2/image_6610.png (deflated 0%)
  adding: Galaxy10_train_images/2/image_10654.png (deflated 0%)
  adding: Galaxy10_train_images/2/image_10093.png (deflated 0%)
  adding: Galaxy10_train_images/2/image_3035.png (deflated 0%)
  adding: Galaxy10_train_images/2/image_13902.png (deflated 0%)
  adding: Galaxy10_train_images/2/image_6728.png (deflated 0%)
  adding: Galaxy10_train_images/2/image_7849.png (deflated 0%)
  adding: Galaxy10_train_images/2/image_12692.png (deflated 0%)
  a