In [None]:
import numpy as np  
import pandas as pd 
import seaborn as sns 
import matplotlib.pyplot as plt 
import pandas.api.types 
import sklearn.metrics 
import nibabel as nib
import os 
import pydicom
from glob import glob
from tqdm import tqdm, trange

In [None]:
file_path = "/kaggle/input/rsna-2023-abdominal-trauma-detection"
train_csv = f"{file_path}/train.csv"
train = pd.read_csv(train_csv)
train.head()

In [None]:
print(train.isnull().sum())

print(train.shape)

print(train.info())

In [None]:
f = train.describe()
mean_values = f.iloc[1]
train.apply(np.max)

In [None]:
organs_healthy = [
    'bowel_healthy',
    'extravasation_healthy',
    'kidney_healthy',
    'liver_healthy',
    'spleen_healthy'
]
corr_matrix1 = train[organs_healthy].corr()
sns.heatmap(corr_matrix1, annot=True)
plt.title('Correlation Heatmap for healthy organs')

In [None]:
low_high = [
    'bowel_injury',
    'extravasation_injury',
    'kidney_low',
    'kidney_high',
    'liver_high',
    'liver_low',
    'spleen_low',
    'spleen_high',
    'any_injury'
]
corr_matrix2 = train[low_high].corr()
sns.heatmap(corr_matrix2, annot=True, linewidths=1)
plt.title('Correlation Heatmap for Injury Organs')


In [None]:
train_series_meta = pd.read_csv('/kaggle/input/rsna-2023-abdominal-trauma-detection/train_series_meta.csv')
train_series_meta.head()
print(train_series_meta.info())
print(train_series_meta.isnull().sum())
f = train_series_meta.describe()
f.iloc[1]

In [None]:
test_series_meta=pd.read_csv("/kaggle/input/rsna-2023-abdominal-trauma-detection/test_series_meta.csv") 
test_series_meta.head()

In [None]:
print(test_series_meta.isnull().sum()) 

f=test_series_meta.describe()
f.iloc[1]

In [None]:
image_labels = pd.read_csv("/kaggle/input/rsna-2023-abdominal-trauma-detection/image_level_labels.csv")
image_labels.head()

In [None]:
print(image_labels.info())
print(image_labels.isnull().sum())
f = image_labels.describe()
f.iloc[1]

In [None]:
train[Target_cols].describe()

In [None]:
class ParticipantVisibleError(Exception):
    pass

def normalize_probabilities_to_one(df: pd.DataFrame, group_columns: list) -> pd.DataFrame:
    # Normalize the sum of each row's probabilities to 100%.
    # 0.75, 0.75 => 0.5, 0.5
    # 0.1, 0.1 => 0.5, 0.5
    row_totals = df[group_columns].sum(axis=1)
    if row_totals.min() == 0:
        raise ParticipantVisibleError('All rows must contain at least one non-zero prediction')
    for col in group_columns:
        df[col] /= row_totals
    return df

def score(solution: pd.DataFrame, submission: pd.DataFrame, row_id_column_name: str) -> float:
    '''
    Pseudocode:
    1. For every label group (liver, bowel, etc):
        - Normalize the sum of each row's probabilities to 100%.
        - Calculate the sample weighted log loss.
    2. Derive a new any_injury label by taking the max of 1 - p(healthy) for each label group
    3. Calculate the sample weighted log loss for the new label group
    4. Return the average of all of the label group log losses as the final score.
    '''
    del solution[row_id_column_name]
    del submission[row_id_column_name]
    # Run basic QC checks on the inputs
    if not pandas.api.types.is_numeric_dtype(submission.values):
        raise ParticipantVisibleError('All submission values must be numeric')

    if not np.isfinite(submission.values).all():
        raise ParticipantVisibleError('All submission values must be finite')

    if solution.min().min() < 0:
        raise ParticipantVisibleError('All labels must be at least zero')
    if submission.min().min() < 0:
        raise ParticipantVisibleError('All predictions must be at least zero')

    # Calculate the label group log losses
    binary_targets = ['bowel', 'extravasation']
    triple_level_targets = ['kidney', 'liver', 'spleen']
    all_target_categories = binary_targets + triple_level_targets

    label_group_losses = []
    for category in all_target_categories:
        if category in binary_targets:
            col_group = [f'{category}_healthy', f'{category}_injury']
        else:
            col_group = [f'{category}_healthy', f'{category}_low', f'{category}_high']
            
        solution = normalize_probabilities_to_one(solution, col_group)

        for col in col_group:
            if col not in submission.columns:
                raise ParticipantVisibleError(f'Missing submission column {col}')
        submission = normalize_probabilities_to_one(submission, col_group)
        label_group_losses.append(
            sklearn.metrics.log_loss(
                y_true=solution[col_group].values,
                y_pred=submission[col_group].values,
                sample_weight=solution[f'{category}_weight'].values
            )
        )
        
    # Derive a new any_injury label by taking the max of 1 - p(healthy) for each label group
    healthy_cols = [x + '_healthy' for x in all_target_categories]
    any_injury_labels = (1 - solution[healthy_cols]).max(axis=1)
    any_injury_predictions = (1 - submission[healthy_cols]).max(axis=1)
    any_injury_loss = sklearn.metrics.log_loss(
        y_true=any_injury_labels.values,
        y_pred=any_injury_predictions.values,
        sample_weight=solution['any_injury_weight'].values
    )

    label_group_losses.append(any_injury_loss)
    return np.mean(label_group_losses) 

In [None]:
def create_training_solution(y_train):
    sol_train = y_train.copy()
    
    # bowel healthy|injury sample weight = 1|2/1
    sol_train['bowel_weight'] = np.where(sol_train['bowel_injury'] == 1, 2, 1)
    
    # extravasation healthy/injury sample weight = 1|6/1
    sol_train['extravasation_weight'] = np.where(sol_train['extravasation_injury'] == 1, 6, 1)
    
    # kidney healthy|low|high sample weight = 1|2|4
    sol_train['kidney_weight'] = np.where(sol_train['kidney_low'] == 1, 2, np.where(sol_train['kidney_high'] == 1, 4, 1))
    
    # liver healthy|low|high sample weight = 1|2|4
    sol_train['liver_weight'] = np.where(sol_train['liver_low'] == 1, 2, np.where(sol_train['liver_high'] == 1, 4, 1))
    
    # spleen healthy|low|high sample weight = 1|2|4
    sol_train['spleen_weight'] = np.where(sol_train['spleen_low'] == 1, 2, np.where(sol_train['spleen_high'] == 1, 4, 1))
    
    # any healthy|injury sample weight = 1|6/1
    sol_train['any_injury_weight'] = np.where(sol_train['any_injury'] == 1, 6, 1)
    return sol_train

In [None]:
solution_train = create_training_solution(train)

# predict a constant using the mean of the training data
y_pred = train.copy()
y_pred[Target_cols] = train[Target_cols].mean().tolist()

no_scale_score = score(solution_train,y_pred,'patient_id')
print(f'Training score without scaling: {no_scale_score}')

In [None]:
# Group by different sample weights
scale_by_2 = ['kidney_low','liver_low','spleen_low','spleen_high']
scale_by_4 = ['bowel_injury','kidney_high','liver_high']
scale_by_6 = ['extravasation_injury','any_injury']
scale_healthy = ['bowel_healthy', 'extravasation_healthy', 'kidney_healthy', 'liver_healthy', 'spleen_healthy']

sf_2 = 2.8461531332
sf_4 = 4.841531
sf_6 = 20.81635153
scale_h = 0.99519515313

# The score function deletes the ID column so we remake it
solution_train = create_training_solution(train)

# Reset the prediction
y_pred = train.copy()
y_pred[Target_cols] = train[Target_cols].mean().tolist()

# Scale each target 
y_pred[scale_by_2] *=sf_2
y_pred[scale_by_4] *=sf_4
y_pred[scale_by_6] *=sf_6
y_pred[scale_healthy] *=scale_h

weight_scale_score = score(solution_train,y_pred,'patient_id')
print(f'Training score with weight scaling: {weight_scale_score}')

In [None]:
solution_train = create_training_solution(train)

y_pred = train.copy()
y_pred[Target_cols] = train[Target_cols].mean().tolist()

y_pred[scale_by_2] *= sf_2
y_pred[scale_by_4] *= sf_4
y_pred[scale_by_6] *= sf_6
y_pred[scale_healthy] *=scale_h

improved_scale_score = score(solution_train, y_pred, 'patient_id')
print(f'Training score with better scaling: {improved_scale_score}')

In [None]:
import pandas as pd
import numpy as np
import seaborn as sb
import matplotlib.pyplot as plt
import nibabel as nib

import os
import pydicom
from glob import glob
from tqdm import tqdm, trange

PATH = '/kaggle/input/rsna-2023-abdominal-trauma-detection'

In [None]:
id = 21057 #input series_id

a = glob(f'{PATH}/segmentations/*.nii')
for i in range(len(a)):
    if (f'{PATH}/segmentations/{id}.nii' == a[i]):
        print('ok')
        break
if i == len(a)-1:
    print('The id does`t extist')

In [None]:
train = pd.read_csv(f'{PATH}/train.csv')

train_series_meta = pd.read_csv(f'{PATH}/train_series_meta.csv')
df = pd.merge(train_series_meta, train, how='inner', on='patient_id')

seg_filepath = f'{PATH}/segmentations/{id}.nii'

patient_id = df[df['series_id'] == id]['patient_id'].iloc[0]
filepath = f'{PATH}/train_images/{patient_id}/{id}'

In [None]:
def create_3D_scans(folder, downsample_rate=1): 
    filenames = os.listdir(folder)
    filenames = [int(filename.split('.')[0]) for filename in filenames]
    filenames = sorted(filenames)
    filenames = [str(filename) + '.dcm' for filename in filenames]
        
    volume = []
    for filename in tqdm(filenames[::downsample_rate]):
        filepath = os.path.join(folder, filename)
        ds = pydicom.dcmread(filepath)
        image = ds.pixel_array
        
        # find rescale params
        if ("RescaleIntercept" in ds) and ("RescaleSlope" in ds):
            intercept = float(ds.RescaleIntercept)
            slope = float(ds.RescaleSlope)
    
        # find clipping params
        center = int(ds.WindowCenter)
        width = int(ds.WindowWidth)
        low = center - width / 2
        high = center + width / 2    
        image = (image * slope) + intercept
        image = np.clip(image, low, high)

        image = (image / np.max(image) * 255).astype(np.int16)
        image = image[::downsample_rate, ::downsample_rate]
        volume.append( image )
    
    volume = np.stack(volume, axis=0)
    return volume

In [None]:
def create_3D_segmentations(filepath, downsample_rate=1):
    img = nib.load(filepath).get_fdata()
    img = np.transpose(img, [2, 1, 0])
    img = np.rot90(img, -1, (1,2))
    img = img[::-1,:,:]
    img = np.transpose(img, [2, 1, 0])
    img = img[::downsample_rate, ::downsample_rate, ::downsample_rate]
    return img

In [None]:
volume = create_3D_scans(filepath)
volume = volume.transpose(1, 2, 0)
volume_seg = create_3D_segmentations(seg_filepath)
volume.shape

In [None]:
z =500 #slice number
fig = plt.figure(figsize=(16,16))
ax1 = fig.add_subplot(151)
ax1.imshow(np.where(volume_seg[:,:,z]==1,1,0), cmap = 'gray') #liver
ax2 = fig.add_subplot(152)
ax2.imshow(np.where(volume_seg[:,:,z]==2,1,0), cmap = 'gray') #spleen
ax3 = fig.add_subplot(153)
ax3.imshow(np.where(volume_seg[:,:,z]==3,1,0), cmap = 'gray') #right kidney
ax4 = fig.add_subplot(154)
ax4.imshow(np.where(volume_seg[:,:,z]==4,1,0), cmap = 'gray') #left kidney
ax5 = fig.add_subplot(155)
ax5.imshow(np.where(volume_seg[:,:,z]==5,1,0), cmap = 'gray') #bowel
plt.show()

In [None]:
liver_ct = volume*np.where(volume_seg==1,1,0)
spleen_ct = volume*np.where(volume_seg==2,1,0)
r_kidney_ct = volume*np.where(volume_seg==3,1,0)
l_kidney_ct = volume*np.where(volume_seg==4,1,0)
bowel_ct = volume*np.where(volume_seg==5,1,0)

In [None]:
z =500 #slice number
fig = plt.figure(figsize=(16,16))
ax1 = fig.add_subplot(151)
ax1.imshow(liver_ct[:,:,z], cmap = 'gray') #liver
ax2 = fig.add_subplot(152)
ax2.imshow(spleen_ct[:,:,380], cmap = 'gray') #spleen
ax3 = fig.add_subplot(153)
ax3.imshow(r_kidney_ct[:,:,z], cmap = 'gray') #right kidney
ax4 = fig.add_subplot(154)
ax4.imshow(l_kidney_ct[:,:,z], cmap = 'gray') #left kidney
ax5 = fig.add_subplot(155)
ax5.imshow(bowel_ct[:,:,z], cmap = 'gray') #bowel
plt.show()

In [None]:
submission = pd.read_csv('/kaggle/input/rsna-2023-abdominal-trauma-detection/sample_submission.csv')
submission[Target_cols] = train[Target_cols].mean().tolist()
submission[scale_by_2] *=sf_2
submission[scale_by_4] *=sf_4
submission[scale_by_6] *=sf_6 
submission[scale_healthy] *=scale_h
submission.to_csv('submission.csv', index=False)