In [1]:
# this script prepares the table for the 

import os
import sys
import ast
import pandas as pd
import numpy as np 

sys.path.append('/home/oleksii/projects/ohif-orthanc-postgres-docker/')
from utils.utils import get_orthanc_client

orthanc_client = get_orthanc_client()

# Read the segmentation.csv file
segmentation_path = '/data/oleksii/alta-ai.com/alta-ai-orthanc-backup/export20240814/mongo_csv_dump/aggregatedData/segmentation.csv'
df = pd.read_csv(segmentation_path)

df = df[df["segmentType"] == 'lesion']

# Function to extract and join information from the 'segments' column
def extract_segment_info(segments):
    try:
        segments_list = ast.literal_eval(segments)
        info = {
            'segment_id': ','.join(str(i) for i in range(1, len(segments_list)+1)),
            'volume': ','.join(str(segment['generalInfo'].get('voxelVolume', '')) for segment in segments_list),
            'zone': ','.join(segment['detailInfo'].get('zone', '') for segment in segments_list),
            'region': ','.join(segment['detailInfo'].get('region', '') for segment in segments_list),
            'class': ','.join(segment['detailInfo'].get('diagnosticClass', '') for segment in segments_list),
            'laterality': ','.join(segment['detailInfo'].get('laterality', '') for segment in segments_list),
            'trackingUID': ','.join(segment.get('trackingUID', '') for segment in segments_list)
        }
        return pd.Series(info)
    except (ValueError, SyntaxError, KeyError) as e:
        print(f"Error parsing segments: {e}")
        return pd.Series({
            'segment_id': '', 'volume': '', 'zone': '', 'region': '', 'class': '', 'laterality': '', 'trackingUID': ''
        })

# Apply the function to create new columns with joined segment information
result_df = df.join(df['segments'].apply(extract_segment_info))

# Rename columns to match segments_copy.csv format
result_df = result_df.rename(columns={
    'studyInstanceUID': 'study_uid',
    'seriesInstanceUID': 'series_uid'
})

# Function to safely extract confidenceLevel
def get_confidence_level(editing_info):
    try:
        info = ast.literal_eval(editing_info)
        return info.get('confidenceLevel', '')
    except (ValueError, SyntaxError):
        return ''

result_df['confidence'] = df['editingInfo'].apply(get_confidence_level)
result_df = result_df[result_df["confidence"] == 'perfect']

# Add comment column
result_df['comment'] = ''

# Normalize 'class' values
def normalize_class(class_string):
    class_map = {
        'Chronic inflammation': 'CI',
        'chronische Entzündung': 'CI',
        'prostatitis + high grade pin': 'CI',
        'prostatitis': 'CI',
        'No tumor': 'N/A',
        'tumorfreies prostatagewebe': 'N/A',
        'High-grade PIN': 'HighPIN',
        'high grade pin': 'HighPIN',
        'Gleason 6': 'GS6',
        'Gleason 7a': 'GS7a',
        'Gleason 7b': 'GS7b',
        'Gleason 8': 'GS8',
        'Gleason 9': 'GS9',
        'Gleason 10': 'GS10'
    }
    return ','.join(class_map.get(c.strip(), c.strip()) for c in class_string.split(','))

result_df['class'] = result_df['class'].apply(normalize_class)

# Select and reorder columns to match segments_copy.csv
column_order = ['study_uid', 'series_uid', 'segment_id', 'comment', 'confidence', 'zone', 'volume', 'region', 'class', 'laterality', 'trackingUID']
result_df = result_df[column_order]

In [7]:
def is_csPCa(x):
    csPCa_list_ = ['GS7a', 'GS7b', 'GS8', 'GS9', 'GS10']
    return any(word in x for word in csPCa_list_)

result_df['case_csPCa'] = result_df['class'].apply(lambda x: is_csPCa(x) if not pd.isnull(x) else x)

def max_class(x):
    ordered_classes = ['', 'N/A', 'HighPIN', 'CI', 'GS6', 'GS7a', 'GS7b', 'GS8', 'GS9', 'GS10']
    max_class = max([ordered_classes.index(cl) for cl in x.split(',')])
    return ordered_classes[max_class]

result_df['case_class'] = result_df['class'].apply(lambda x: max_class(x) if not pd.isnull(x) else x)

In [9]:
def get_oid_from_uid(uid):
    study_candidates = orthanc_client.post_tools_lookup(data=uid)
    for i in study_candidates:
        if i['Type'] == 'Study':
            return i['ID']
    return None
                   
for i, row in result_df.iterrows():
    oid = get_oid_from_uid(row['study_uid'])
    # print(oid, row['StudyInstanceUID'])
    result_df.loc[i, 'study_orthanc_id'] = oid


In [10]:
df_study = pd.read_csv("/data/oleksii/alta-ai.com/alta-ai-orthanc-backup/export20240814/mongo_csv_dump/aggregatedData/study.csv")
healthy = df_study[df_study['diagnoses'] == "{'prostateCancer': {'diagnosis': 'healthy'}}"]

In [11]:
# Mark the 'healthy' column based on the missing study_orthanc_ids

# alta_ai_lesions[result_df_oid["StudyInstanceUID"].isin(healthy['studyInstanceUID'])]
result_df['healthy'] = np.where(result_df['study_uid'].isin(healthy['studyInstanceUID']), True, False)

# Set empty values for specified columns where study_orthanc_id is NaN
columns_to_empty = ['segment_id', 'comment', 'zone', 'volume', 'region', 'laterality', 'class', 'case_class']

for col in columns_to_empty:
    result_df.loc[result_df['healthy'], col] = ''
    
# healthy = result_df_oid[result_df_oid['study_orthanc_id'].isna()]

In [14]:
# Save the result to a new CSV file
result_df.to_csv(os.path.join(os.path.dirname(segmentation_path), 'segments_metadata.csv'), index=False, sep=';')

In [None]:
sftp://141.44.17.135:2200/data/oleksii/Prostate-Lesion-Datasets-NRRDS/ALTA-Lesion-Dataset-alta_ai-export20240814-preprocessed

In [18]:

# merge cases
import shutil
# target = "/data/oleksii/Prostate-Lesion-Datasets-NRRDS/ALTA-Lesion-Dataset-alta_ai-export20240814-seg-IDS-fresh"
# destination = "/data/oleksii/Prostate-Lesion-Datasets-NRRDS/ALTA-Lesion-Dataset-alta_ai-export20240809"

target = "/data/oleksii/Prostate-Lesion-Datasets-NRRDS/ALTA-Lesion-Dataset-alta_ai-export20240814-preprocessed"
destination = "/data/oleksii/Prostate-Lesion-Datasets-NRRDS/train/ALTA-Lesion-Dataset-preprocessed"

# target = "/data/oleksii/Prostate-Lesion-Datasets-NRRDS/ALTA-Lesion-Dataset-alta_ai-export20240814-seg-IDS-fresh-seg"
# destination = "/data/oleksii/Prostate-Lesion-Datasets-NRRDS/ALTA-Lesion-Dataset-alta_ai-export20240809-seg"

for soid in os.listdir(target):
    target_path = os.path.join(target, soid)
    destination_path = os.path.join(destination, soid)
    print(target_path, destination_path)
    shutil.move(target_path, destination_path)

/data/oleksii/Prostate-Lesion-Datasets-NRRDS/ALTA-Lesion-Dataset-alta_ai-export20240814-preprocessed/76d45785-ec9fbd19-8e6b2467-238dbe6b-c5ea18f4 /data/oleksii/Prostate-Lesion-Datasets-NRRDS/train/ALTA-Lesion-Dataset-preprocessed/76d45785-ec9fbd19-8e6b2467-238dbe6b-c5ea18f4
/data/oleksii/Prostate-Lesion-Datasets-NRRDS/ALTA-Lesion-Dataset-alta_ai-export20240814-preprocessed/da0a601e-840f4712-ed9aa459-a861cfc0-22235948 /data/oleksii/Prostate-Lesion-Datasets-NRRDS/train/ALTA-Lesion-Dataset-preprocessed/da0a601e-840f4712-ed9aa459-a861cfc0-22235948
/data/oleksii/Prostate-Lesion-Datasets-NRRDS/ALTA-Lesion-Dataset-alta_ai-export20240814-preprocessed/9be2122f-a42c5e34-6fb3b5a8-646cb445-0a945a70 /data/oleksii/Prostate-Lesion-Datasets-NRRDS/train/ALTA-Lesion-Dataset-preprocessed/9be2122f-a42c5e34-6fb3b5a8-646cb445-0a945a70
/data/oleksii/Prostate-Lesion-Datasets-NRRDS/ALTA-Lesion-Dataset-alta_ai-export20240814-preprocessed/a3873275-221760f9-157505d0-eaff5e57-abb146dd /data/oleksii/Prostate-Lesion

In [37]:
# checking if the target of the segmentation matches the trackingUID in list
# use dicom_oid to access right dicom
import glob
import pydicom
lesions = glob.glob("/data/oleksii/alta-ai.com/alta-ai-orthanc-backup/export20240809/prostate_lesion/ProcessingState.PERFECT/*/seg.dcm")

for i, l in enumerate(lesions):
    orthancID = l.split("/")[-2]
    print(result_df_oid[result_df_oid['dicom_oid'] == orthancID]['trackingUID'].values)
    dataset = pydicom.dcmread(l)
    for segment in dataset[(0x0062, 0x0002)]:
        print(segment[(0x0062, 0x0020)])
        print(segment[(0x0062, 0x0021)])

In [2]:

# merge tables
df1 = pd.read_csv("/data/oleksii/Prostate-Lesion-Datasets-NRRDS/train/lesion_dataset_1909_20240131_OB.csv", sep=';')
df2 = pd.read_csv("/data/oleksii/alta-ai.com/alta-ai-orthanc-backup/export20240814/mongo_csv_dump/aggregatedData/segments_metadata_preprocessed.csv", sep=';')

current_sequence_map = pd.read_csv("/home/oleksii/projects/ohif-orthanc-postgres-docker/sequence_mapping/sequence_mapping_13681_studies_20240807.csv",
                                   sep=';')
alta_ai_lesions_seq = pd.merge(df2, current_sequence_map, on='study_orthanc_id', how='left')

from datetime import datetime

# Get today's date as a string in the format yyyymmdd
today = datetime.today().strftime('%Y%m%d')

df = pd.concat([df1, df2])
df.to_csv(f"/data/oleksii/Prostate-Lesion-Datasets-NRRDS/train/lesion_dataset_{len(df)}_{today}_OB.csv", sep=';', index=False)

