In [2]:
pip install pyradiomics





In [3]:
import os
import nibabel as nib
import pandas as pd
import pyradiomics
# from pyradiomics import featureextractor
import logging

# Optional: suppress verbose logging
logging.getLogger('radiomics').setLevel(logging.ERROR)

# Path to your segmented tumor masks
base_dir = r"C:\PKG - UPENN-GBM-NIfTI\UPENN-GBM\NIfTI-files\automated_segm"
output_csv = "tumor_features.csv"

# Configure feature extractor
extractor = featureextractor.RadiomicsFeatureExtractor()
extractor.enableAllFeatures()

data = []

for file in os.listdir(base_dir):
    if file.endswith('.nii.gz'):
        patient_id = file.split('_')[2] if '_' in file else file.split('.')[0]
        path = os.path.join(base_dir, file)

        # Since it's a binary tumor mask, use the same file as both image and mask
        try:
            result = extractor.execute(imageFilepath=path, maskFilepath=path)
            filtered_result = {k: v for k, v in result.items() if "diagnostics" not in k}
            filtered_result['PatientID'] = patient_id
            data.append(filtered_result)
        except Exception as e:
            print(f"Failed for {file}: {e}")

# Create DataFrame and save to CSV
df = pd.DataFrame(data)
df.set_index("PatientID", inplace=True)
df.to_csv(output_csv)

print(f"Saved {len(df)} feature vectors to {output_csv}")

ModuleNotFoundError: No module named 'pyradiomics'

In [8]:
import os
import torch
import torch.nn as nn
import nibabel as nib
import numpy as np
import pandas as pd
from scipy.ndimage import zoom

# ======= 1. CNN Model (Simple 3D CNN as feature extractor) ============
class TumorFeatureCNN(nn.Module):
    def __init__(self):
        super(TumorFeatureCNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv3d(1, 8, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool3d(2),
            nn.Conv3d(8, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.AdaptiveAvgPool3d((4, 4, 4))  # Output shape: (16, 4, 4, 4)
        )
    def forward(self, x):
        x = self.conv_layers(x)
        return x.view(x.size(0), -1)  # Flatten features (batch_size, 1024)

# ========= 2. NIfTI loader and preprocessing ===============
def load_nifti(path, target_shape=(64, 64, 64)):
    data = nib.load(path).get_fdata()
    data = (data - data.min()) / (data.max() - data.min() + 1e-6)  # Normalize to [0, 1]
    zoom_factors = [t / s for t, s in zip(target_shape, data.shape)]
    data = zoom(data, zoom_factors, order=1)  # Resize
    data = np.expand_dims(data, axis=0)  # Add channel dim (1, D, H, W)
    return torch.tensor(data, dtype=torch.float32)

# ========== 3. Feature Extraction Pipeline ================
def extract_features_from_folder(folder_path):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = TumorFeatureCNN().to(device).eval()

    features = []
    patient_ids = []

    for fname in os.listdir(folder_path):
        if fname.endswith('.nii.gz'):
            path = os.path.join(folder_path, fname)
            # Extract everything before "_automated_approx_segm.nii.gz"
            patient_id = fname.replace('_automated_approx_segm.nii.gz', '')

            try:
                tensor = load_nifti(path).unsqueeze(0).to(device)
                with torch.no_grad():
                    feat = model(tensor).cpu().numpy().flatten()
                features.append(feat)
                patient_ids.append(patient_id)
            except Exception as e:
                print(f"[ERROR] {fname}: {e}")


    df = pd.DataFrame(features)
    df.insert(0, "PatientID", patient_ids)
    return df

# ========== 4. Run and Save to CSV ========================
if __name__ == "__main__":
    input_folder = r"C:\PKG - UPENN-GBM-NIfTI\UPENN-GBM\NIfTI-files\automated_segm"
    output_csv = "cnn_tumor_features.csv"

    df = extract_features_from_folder(input_folder)
    df.to_csv(output_csv, index=False)
    print(f"✅ Features extracted and saved to {output_csv}")

✅ Features extracted and saved to cnn_tumor_features.csv


In [None]:
import pandas as pd

# File paths
cnn_features_path = r'D:\mlpr data\Glioblastoma-ML-model\cnn_tumor_features.csv'
clinical_info_path = r'D:\mlpr data\Glioblastoma-ML-model\UPENN-GBM_clinical_info_v2.1.csv'
output_path = r'D:\mlpr data\Glioblastoma-ML-model\cnn_stacked.csv'

# Load data
cnn_df = pd.read_csv(cnn_features_path)
clinical_df = pd.read_csv(clinical_info_path)

# Merge on patient ID (assuming both columns are named 'ID' in each CSV)
merged_df = cnn_df.merge(
    clinical_df[['ID', 'Survival_from_surgery_days_UPDATED']],
    on='ID',
    how='left'  # keeps all cnn features and fills with NaN if no clinical match
)

# Save to output
merged_df.to_csv(output_path, index=False)

print("✅ cnn_stacked.csv saved at:", output_path)


KeyError: 'PatientID'