In [None]:
import os
import sys
import torch
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
ROOT_DIR = "proj_dir"

os.chdir(ROOT_DIR)

sys.path.append(os.path.join(ROOT_DIR, "src"))

In [None]:
#TO TEST MODEL'S OUTPUT SIZE/DIM
from model import ResNet20
import torch

model = ResNet20()
sample_input = torch.randn(1, 1, 150, 150)  # Batch size 1, grayscale image (1,150,150)
print(model(sample_input).shape)  # Expected : torch.Size([1, 3])


torch.Size([1, 3])


In [None]:
from npy import create_single_npy

In [None]:
#TO SPEED UP THE DATA LOADING PROCESS, WE CONVERT 10K .npy files to a single .npy file, check ./src/npy.py for the function defn.
create_single_npy('proj_dir/dataset/train', 'proj_dir/dataset/train_data.npy')
create_single_npy('proj_dir/dataset/val', 'proj_dir/dataset/val_data.npy')

In [None]:
data, labels = np.load("proj_dir/dataset/train_data.npy", allow_pickle=True)
print(data.shape, labels.shape)

In [None]:
import numpy as np
import os

def check_npy_structure(base_path):
    classes = ["no", "vort", "sphere"]
    for cls in classes:
        folder_path = os.path.join(base_path, cls)
        npy_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.npy')]

        if not npy_files:
            print(f"No .npy files found in {folder_path}")
            continue

        sample_file = npy_files[0]  # Load the first file in the class folder
        sample_data = np.load(sample_file, allow_pickle=True)

        print(f"Class: {cls}")
        print(f"  - File: {os.path.basename(sample_file)}")
        print(f"  - Type: {type(sample_data)}")
        print(f"  - Shape: {sample_data.shape if isinstance(sample_data, np.ndarray) else 'Not a NumPy array'}")
        print("-" * 50)

train_path = "dir/Classification/dataset/train"
val_path = "dir/Classification/dataset/val"

print("Checking TRAIN dataset structure:")
check_npy_structure(train_path)

print("\nChecking VALIDATION dataset structure:")
check_npy_structure(val_path)


Checking TRAIN dataset structure:
Class: no
  - File: 8707.npy
  - Type: <class 'numpy.ndarray'>
  - Shape: (1, 150, 150)
--------------------------------------------------
Class: vort
  - File: 8961.npy
  - Type: <class 'numpy.ndarray'>
  - Shape: (1, 150, 150)
--------------------------------------------------
Class: sphere
  - File: 9299.npy
  - Type: <class 'numpy.ndarray'>
  - Shape: (1, 150, 150)
--------------------------------------------------

Checking VALIDATION dataset structure:
Class: no
  - File: 1475.npy
  - Type: <class 'numpy.ndarray'>
  - Shape: (1, 150, 150)
--------------------------------------------------
Class: vort
  - File: 1310.npy
  - Type: <class 'numpy.ndarray'>
  - Shape: (1, 150, 150)
--------------------------------------------------
Class: sphere
  - File: 1419.npy
  - Type: <class 'numpy.ndarray'>
  - Shape: (1, 150, 150)
--------------------------------------------------


In [None]:
import numpy as np
import os

def create_npz(input_dir, output_file):
    classes = ["no", "vort", "sphere"]
    all_data = []
    all_labels = []

    label_map = {"no": 0, "vort": 1, "sphere": 2}

    for cls in classes:
        folder_path = os.path.join(input_dir, cls)
        npy_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.npy')]

        for file in npy_files:
            data = np.load(file)  # Shape: (1, 150, 150)
            all_data.append(data)
            all_labels.append(label_map[cls])

    all_data = np.stack(all_data)  # Shape: (N, 1, 150, 150)
    all_labels = np.array(all_labels)  # Shape: (N,)

    np.savez_compressed(output_file, images=all_data, labels=all_labels)
    print(f"Saved {output_file} with shape {all_data.shape}, Labels shape {all_labels.shape}")

# Run this for both train and validation sets
train_dir = "dir/Classification/dataset/train"
val_dir = "dir/Classification/dataset/val"

create_npz(train_dir, "dir/Classification/dataset/train_data.npz")
create_npz(val_dir, "dir/Classification/dataset/val_data.npz")


Saved /content/drive/MyDrive/Hari/Evaluation/Classification/dataset/train_data.npz with shape (30000, 1, 150, 150), Labels shape (30000,)
Saved /content/drive/MyDrive/Hari/Evaluation/Classification/dataset/val_data.npz with shape (7500, 1, 150, 150), Labels shape (7500,)


In [None]:
train_data = np.load("dir/Classification/dataset/train_data.npz")
X_train = train_data["images"]  # Shape: (N, 1, 150, 150)
y_train = train_data["labels"]  # Shape: (N,)

val_data = np.load("dir/Classification/dataset/val_data.npz")
X_val = val_data["images"]
y_val = val_data["labels"]

print(f"Train set: {X_train.shape}, Labels: {y_train.shape}")
print(f"Val set: {X_val.shape}, Labels: {y_val.shape}")


✅ Training set: (30000, 1, 150, 150), Labels: (30000,)
✅ Validation set: (7500, 1, 150, 150), Labels: (7500,)
