# Unetのセグメンテーションモデル

In [None]:
pip install segmentation_models_pytorch

In [None]:
pip install pydicom

In [None]:
pip install timm

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import pydicom
import numpy as np
import os
import glob
from tqdm import tqdm
import gc

import torchvision
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from fastai.vision.all import *
import segmentation_models_pytorch as smp

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
CV = 5 #画像分類の学習に用いるクロスバリデーションの数
SEED = 777 #シード値
fold = 1 #検証用データのfold値
PATCH_SIZE = 512 #全体の画像の最終的なサイズ
patch_size = 64 #セグメンテーションをして切り取った部分の画像のサイズ
TH = .5 #全体の画像から病気であると判定した根拠の部分にマスク値を付けたときの閾値
SEG_TRAIN = False#セグメンテーションモデルを動かしたいときはTrueにする
SEG = {
    'BS':16,#セグメンテーションモデルのバッチサイズ
    'LR':5e-4,#セグメンテーションモデルの学習率
    'EPOCHS':10#セグメンテーションモデルのエポック数
}
INF = {
    'BS':64,#病気を判定するモデルのバッチサイズ
    'LR':1e-5,#病気を判定するモデルの学習率
    'EPOCHS':10#病気を判定するモデルのエポック数
}

In [None]:
!unzip /content/drive/MyDrive/rsna-2024-lumbar-spine-degenerative-classification.zip -d /content #RSNA2024を今のディレクトリに展開する

In [None]:
train = pd.read_csv('/content/train.csv')#trainを読み込む
train.tail()
train.tail()

In [None]:
diagnosis = list(filter(lambda x: x.find('foraminal') > -1, train.columns))
train = train[train[diagnosis].isnull().values.sum(1)==0].reset_index(drop=True)
train.tail()

In [None]:
labels = {
    'Normal/Mild':0,
    'Moderate':1,
    'Severe':2
}

In [None]:
df_meta_f = pd.read_csv('/content/drive/MyDrive/RSNA_csv/train_series_descriptions.csv')
df_meta_f.tail()

In [None]:
df_meta_f.rename(columns={'series_description':'series_description_2'},inplace=True)

In [None]:
df_coor = pd.read_csv('/content/train_label_coordinates.csv')
df_coor.tail()

In [None]:
merged_df = pd.merge(df_coor, df_meta_f, on=['study_id', 'series_id'])

In [None]:
SS=merged_df[merged_df['condition'].isin(['Right Subarticular Stenosis','Left Subarticular Stenosis'])]

In [None]:
# 新しいカラムを初期化


# study_id, series_id, instance_numberごとにグループ化して処理
for (study_id, series_id, instance_number), group in SS.groupby(['study_id', 'series_id', 'instance_number']):
    # 'Left Subarticular Stenosis' の場合、該当する座標を取得
    left_condition = group[group['condition'] == 'Left Subarticular Stenosis']
    if not left_condition.empty:
        # 対応する座標を取得し、グループに値を一度に設定
        SS.loc[(SS['study_id'] == study_id) & (SS['series_id'] == series_id) & (SS['instance_number'] == instance_number), ['x_left', 'y_left']] = \
            left_condition[['x', 'y']].values[0]

    # 'Right Subarticular Stenosis' の場合、該当する座標を取得
    right_condition = group[group['condition'] == 'Right Subarticular Stenosis']
    if not right_condition.empty:
        # 対応する座標を取得し、グループに値を一度に設定
        SS.loc[(SS['study_id'] == study_id) & (SS['series_id'] == series_id) & (SS['instance_number'] == instance_number), ['x_right', 'y_right']] = \
            right_condition[['x', 'y']].values[0]

# 確認
print(SS.head())


In [None]:
SS2=SS.dropna()

In [None]:
SCS=merged_df[merged_df['condition']=='Spinal Canal Stenosis']

In [None]:
F=df_coor[df_coor['condition'].isin(['Left Subarticular Stenosis','Right Subarticular Stenosis'])][[
    'study_id',
    'series_id',
    'instance_number',
    'level',
    'condition',
    'x',
    'y'
]].sort_values([
    'study_id',
    'series_id',
    'instance_number'
])[[
    'study_id',
    'series_id',
    'level',
    'instance_number',
    'condition',
    'x',
    'y'
]]
F.tail()

In [None]:
target = F.columns[-6:-1]
target

In [None]:
labels = {
    'Normal/Mild':0,
    'Moderate':1,
    'Severe':2
}

coor = [
    'x_left',
    'y_left',
    'x_right',
    'y_right'
]

In [None]:
target2=['subarticular_stenosis_l1_l2', 'subarticular_stenosis_l2_l3',
       'subarticular_stenosis_l3_l4', 'subarticular_stenosis_l4_l5',
       'subarticular_stenosis_l5_s1']

In [None]:
def augment_image_and_centers(image,centers,alpha):
    '''
    # Randomly flip the image horizontally.
    if random.random() > .5:
      if random.random() > 1 - alpha:
        image = image.flip(-1)
        centers[:,0] = PATCH_SIZE - centers[:,0]
    # Randomly flip the image vertically.
    if random.random() > 0.5:
      if random.random() > 1 - alpha:
        image = image.flip(-2)
        centers[:,1] = PATCH_SIZE - centers[:,1]

    if random.random() > 1 - alpha:
      if random.random() > .5:
    #   Randomly flip the image
    #   Wich axis?
         axis = np.random.randint(2)
        image = image.flip(axis+1)
        centers[:,-1-axis] = PATCH_SIZE - centers[:,-1-axis]
    '''
#   Randomly rotate the image.
    angle = torch.as_tensor(random.uniform(-180, 180)*alpha)
    image = torchvision.transforms.functional.rotate(image,angle.item())
#   https://discuss.pytorch.org/t/rotation-matrix/128260
    angle = angle*math.pi/180
    s = torch.sin(angle)
    c = torch.cos(angle)
    rot = torch.stack([
        torch.stack([c, s]),
        torch.stack([-s, c])
      ])
    centers = ((centers.cpu() - PATCH_SIZE//2) @ rot) + PATCH_SIZE//2
    return image,centers

In [None]:
class T1Dataset(Dataset):
    def __init__(self, df, VALID=False, alpha=0):
        self.data = df
        self.VALID = VALID
        self.alpha = alpha

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        row = self.data.iloc[index]#1行目を取得

        centers = torch.as_tensor([x for x in row[coor]]).view(2,2).float()#病気だと判定した中心座標を取得

        sample = '/content/drive/MyDrive/dataset/'
        sample = sample+str(row['study_id'])+'/'+str(row['series_id'])+'/'+str(row['instance_number'])+'.dcm'
        image = pydicom.dcmread(sample).pixel_array#pydicomで画像を読み込む
        H,W = image.shape
#画像を正方形になるように切り取る。切り取った分中心座標の位置を調整する。
        if H > W:
            d = W
            if not self.VALID:
                h = int((H - d)*(.5 + self.alpha*(.5 - np.random.rand())))
            else:
                h = (H - d)//2
            image = image[h:h+d]
            centers[:,1] -= h
            H = W
        elif H < W:
            d = H
            if not self.VALID:
                w = int((W - d)*(.5 + self.alpha*(.5 - np.random.rand())))
            else:
                w = (W - d)//2
            image = image[:,w:w+d]
            centers[:,0] -= w
            W = H
        image = cv2.resize(image,(PATCH_SIZE,PATCH_SIZE))#画像をPATCH_SIZE*PATCH_SIZEにする
        image = torch.as_tensor(image/np.max(image)).unsqueeze(0).float()

        label = torch.as_tensor(1)#labelは使用しないが適当になにかしら入れておく。

        #中心座標の位置を調整する。

        centers[:,0] = centers[:,0]*PATCH_SIZE/W
        centers[:,1] = centers[:,1]*PATCH_SIZE/H

        if not self.VALID: image,centers = augment_image_and_centers(image,centers,self.alpha)
        return image.to(device),[label.to(device),centers.to(device)]

In [None]:
def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    #結果の再現性を得るために各種シード値を固定するする関数

In [None]:
class myUNet2(nn.Module):
    def __init__(self):
        super(myUNet2, self).__init__()

        self.UNet = smp.Unet(
            encoder_name="resnet18",#セグメンテーションモデルのバックボーンはresnet18を使用
            classes=2,#2種類のマスクをつける
            in_channels=1 #白黒画像なのでチャンネル数は1
        ).to(device)

    def forward(self,X):
        x = self.UNet(X)
        min_values = x.view(-1,2,PATCH_SIZE*PATCH_SIZE).min(-1)[0].view(-1,2,1,1)
        max_values = x.view(-1,2,PATCH_SIZE*PATCH_SIZE).max(-1)[0].view(-1,2,1,1)
        x = (x - min_values)/(max_values - min_values)#全体の画像のテンソルをminmaxscalingで正規化。テンソルのブロードキャスティングに注意すること。

        return x

In [None]:
idx_map = torch.stack([torch.arange(PATCH_SIZE)]*PATCH_SIZE).to(device)
idx_map = torch.stack([idx_map,idx_map.T]).view(1,1,2,PATCH_SIZE,PATCH_SIZE)#画像の各地点にx座標の値とy座標の値を格納したものを用意する
class myLoss2(nn.Module):
    def __init__(
            self,
            alpha=.5
        ):
        super().__init__()
        self.alpha = alpha

    def clone(self):
        return myLoss(self.alpha)

    def forward(
            self,
            y,# 予測される病気と判定するマスク値（0～1）
            t # 病気だと判定した部分の点座標値
        ):
        mask_pred = y
        _,mask_true = t
#       理想的な分布を中心（病気だと判定した部分の座標）から1変数の正規分布を３次元空間上で回転させたような分布（共分散行列がσ^2Iの2次元正規分布ともいえる）であると仮定する。
        s2 = s2 = torch.as_tensor([PATCH_SIZE/8]*2)#正規分布の分散(σ^2)のパラメタを設定(２つのマスクをまとめて)
#       正規分布のパラメタを以下のように設定する
        A = -1/(2*s2).to(device)#正規分布の式全体の係数の設定
        K = 1/torch.sqrt(2*math.pi*s2).to(device)#正規分布のexpの中の係数の設定
        mask_pred = mask_pred*K.view(1,2,1,1)#mask_predの最大値がKになるように合わせる。これは理想的な分布の最大値がKのためそこに合わせる。
        mask = idx_map - mask_true.view(-1,2,2,1,1)#病気だと判定した座標からのx座標とy座標の距離をmaskに格納
        mask = torch.exp((A.view(-1,2,1,1,1)*mask*mask).sum(2))*K.view(-1,2,1,1)#x座標とy座標の距離を使って距離に応じた正規分布の値を作りmaskに格納。
#       ロスは理想的な分布とのcosine類似度にする。コサイン類似度は2次元のものをそのまま1次元として扱って求める。
        D = 1 - ((mask*mask_pred).sum())**2/((mask*mask).sum()*(mask_pred*mask_pred).sum())

        return D


In [None]:
import numpy as np

# SS2の行数を取得
n_rows = len(SS2)

# 1から5の値を繰り返して作成し、行数に合わせる
series_description_values = np.tile([1, 2, 3, 4, 5], n_rows // 5 + 1)[:n_rows]

# 新しいカラム 'series_description' に値を代入（locを使用して直接設定）
SS2 = SS2.copy()  # 明示的にコピーを作成して変更を加える
SS2.loc[:, 'series_description'] = series_description_values

# 結果を確認
print(SS2.head())


In [None]:
SEG_TRAIN=True

In [None]:
import pickle

tdf = SS2[SS2['series_description'] != fold]
vdf = SS2[SS2['series_description'] == fold]



tds = T1Dataset(tdf)#定義したdataset関数からtrainのデータを持ってくる。
vds = T1Dataset(vdf,VALID=True)#定義したdataset関数からvalidのデータを持ってくる。
tdl = torch.utils.data.DataLoader(tds, batch_size=SEG['BS'], shuffle=True, drop_last=True)#バッチサイズを指定してデータローダー化する
vdl = torch.utils.data.DataLoader(vds, batch_size=SEG['BS'], shuffle=False)#バッチサイズを指定してデータローダー化する


if SEG_TRAIN:
    seed_everything(SEED)

    dls = DataLoaders(tdl,vdl))#fastai用にデータローダーを定義する。


    n_iter = len(tds)//SEG['BS']

    model = myUNet2()
    learn = Learner(
        dls,
        model,
        lr=SEG['LR'],#学習率を設定
        loss_func=myLoss2(alpha=0.5),
        # cbs=[
        #     ShowGraphCallback(),#学習曲線を表示
        #     alpha_cb
        # ]
    )
    learn.fit_one_cycle(SEG['EPOCHS'])#エポック数を設定
    with open('/content/drive/MyDrive/RSNA_csv/'+"SEG_"+str(fold)+"_SS"+".pkl", 'wb') as f:
      pickle.dump(model, f)
    del tdl,vdl,dls,model,learn
    gc.collect()

# axialT2画像の前処理

In [None]:
import os
import pandas as pd

# データセットのパス
base_dir = '/content/train_images/'

# リストを初期化
data = []

# 一番上の階層（study_id）のフォルダをたどる
for study_id in os.listdir(base_dir):
    study_path = os.path.join(base_dir, study_id)
    if os.path.isdir(study_path):
        # 二番目の階層（series_id）のフォルダをたどる
        for series_id in os.listdir(study_path):
            series_path = os.path.join(study_path, series_id)
            if os.path.isdir(series_path):
                # 三番目の階層（〇〇.dcm ファイル）をたどる
                for filename in os.listdir(series_path):
                    if filename.endswith('.dcm'):
                        # 〇〇.dcm の〇〇部分（instance_number）を抽出
                        instance_number = filename.split('.')[0]
                        # データを追加
                        data.append([study_id, series_id, instance_number])

# pandas DataFrameを作成
df = pd.DataFrame(data, columns=['study_id', 'series_id', 'instance_number'])

# DataFrameの表示
print(df)

In [None]:
# new_df = pd.read_csv('/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/sample_submission.csv')

df_meta_f = pd.read_csv('/content/train_series_descriptions.csv')
df_meta_f.tail()

df = df.astype('int64')

all_df = pd.merge(df, df_meta_f, on=['study_id', 'series_id'], how='inner')

s_all_df=all_df[all_df['series_description']=='Sagittal T1']

a_all_df=all_df[all_df['series_description']=='Axial T2']

s2_all_df=all_df[all_df['series_description']=='Sagittal T2/STIR']

In [None]:
import os
import numpy as np

# データフレームをコピーして操作
a_all_df_copy = a_all_df.copy()

# x_pos カラムを NaN で初期化
a_all_df_copy['x_pos'] = None
a_all_df_copy['y_pos'] = None
a_all_df_copy['z_pos'] = None
a_all_df_copy['pixel_sp_z'] = None
a_all_df_copy['pixel_sp_y'] = None

for idx, row in a_all_df_copy.iterrows():
    # DICOMファイルのパスを構築
    dicom_file_path = f"/content/train_images/{row['study_id']}/{row['series_id']}/{row['instance_number']}.dcm"

    # DICOMファイルを読み込む
    dicom_data = pydicom.dcmread(dicom_file_path)
    H,W = dicom_data.pixel_array.shape


    # 'Image Position (Patient)' の x 座標を取得して x_pos カラムに格納
    a_all_df_copy.loc[idx, 'x_pos'] = dicom_data.ImagePositionPatient[0]
    a_all_df_copy.loc[idx, 'y_pos'] = dicom_data.ImagePositionPatient[1]
    a_all_df_copy.loc[idx, 'z_pos'] = dicom_data.ImagePositionPatient[2]
    a_all_df_copy.loc[idx, 'pixel_sp_z']=dicom_data.PixelSpacing[0]
    a_all_df_copy.loc[idx, 'pixel_sp_y']=dicom_data.PixelSpacing[1]
    a_all_df_copy.loc[idx, 'H'] = H
    a_all_df_copy.loc[idx, 'W'] = W
    a_all_df_copy.loc[idx, 'mean_bfr_cut_y'] = H/2
    a_all_df_copy.loc[idx, 'mean_bfr_cut_x'] = W/2
    a_all_df_copy.loc[idx,'o0']=dicom_data.ImageOrientationPatient[0]
    a_all_df_copy.loc[idx,'o1']=dicom_data.ImageOrientationPatient[1]
    a_all_df_copy.loc[idx,'o2']=dicom_data.ImageOrientationPatient[2]
    a_all_df_copy.loc[idx,'o3']=dicom_data.ImageOrientationPatient[3]
    a_all_df_copy.loc[idx,'o4']=dicom_data.ImageOrientationPatient[4]
    a_all_df_copy.loc[idx,'o5']=dicom_data.ImageOrientationPatient[5]

print(a_all_df_copy)

In [None]:
a_all_df_copy = a_all_df_copy.reset_index(drop=True)


a_all_df=a_all_df_copy


In [None]:
with open('/content/drive/MyDrive/RSNA_csv/'+"SEG_"+"1"+"_SS"+".pkl", 'rb') as f:
      model2=pickle.load(f)

In [None]:
import torch

# (OUT > 0.5)[0][1] がテンソルマスクだと仮定
a_all_df_copy = a_all_df.copy()
for j in range(len(a_all_df)):

  # a_all_df[['study_id', 'series_id', 'instance_number']] = a_all_df[['study_id', 'series_id', 'instance_number']].astype(int)
  image=pydicom.dcmread("/content/train_images/"+str(a_all_df_copy.iloc[j]["study_id"])+"/"+str(a_all_df_copy.iloc[j]["series_id"])+"/"+str(a_all_df_copy.iloc[j]["instance_number"])+".dcm").pixel_array
  dicom=pydicom.dcmread("/content/train_images/"+str(a_all_df_copy.iloc[j]["study_id"])+"/"+str(a_all_df_copy.iloc[j]["series_id"])+"/"+str(a_all_df_copy.iloc[j]["instance_number"])+".dcm")
  H,W = image.shape
  a_all_df_copy.loc[j, 'H'] = H
  a_all_df_copy.loc[j, 'W'] = W
  a_all_df_copy.loc[j,'o0']=dicom.ImageOrientationPatient[0]
  a_all_df_copy.loc[j,'o1']=dicom.ImageOrientationPatient[1]
  a_all_df_copy.loc[j,'o2']=dicom.ImageOrientationPatient[2]
  a_all_df_copy.loc[j,'o3']=dicom.ImageOrientationPatient[3]
  a_all_df_copy.loc[j,'o4']=dicom.ImageOrientationPatient[4]
  a_all_df_copy.loc[j,'o5']=dicom.ImageOrientationPatient[5]
  a_all_df_copy.loc[j, 'H_cut'] = 0
  a_all_df_copy.loc[j, 'W_cut'] = 0
  if H > W:
      d = W
      h = (H - d)//2
      image = image[h:h+d]
      a_all_df_copy.loc[j, 'H_cut'] = h
      # centers[:,1] -= h
      H = W
  elif H < W:
      d = H
      w = (W - d)//2
      image = image[:,w:w+d]
      a_all_df_copy.loc[j, 'W_cut'] = w
      # centers[:,0] -= w
      W = H
  image = cv2.resize(image,(PATCH_SIZE,PATCH_SIZE))
  image = torch.as_tensor(image/np.max(image)).unsqueeze(0).float()
  image=image.to(device)
  OUT = model2(image.unsqueeze(0)).cpu().detach()
  for i in range(2):
    mask = (OUT > 0.5)[0][i]  # 512x512のテンソル

    # Trueのピクセル座標を取得（y座標とx座標のペアとして取得）
    true_pixels = torch.nonzero(mask)

    # 平均座標を計算
    mean_yx = true_pixels.float().mean(dim=0)

    # 平均座標を表示
    mean_y, mean_x = mean_yx[0], mean_yx[1]


    a_all_df_copy.loc[j, f'mean_y_{i}']=mean_y.item()
    a_all_df_copy.loc[j, f'mean_x_{i}']=mean_x.item()
    print(f"Mean coordinates: (y, x) = ({0-mean_y.item()}, {mean_x.item()})")


In [None]:
a_all_df=a_all_df_copy

import torch

# 各マスクの座標を元の画像サイズに戻すコード
for j in range(len(a_all_df)):
    # 元の画像サイズ
    H = a_all_df.loc[j, 'H']
    W = a_all_df.loc[j, 'W']

    # 512にリサイズされた画像のサイズ
    resized_size = 512

    # 高さと幅のスケール（リサイズ前の座標に戻すためのスケール）
    if H<W:
      scale_y = H / resized_size
      scale_x = H / resized_size
    elif H>W:
      scale_y = W / resized_size
      scale_x = W / resized_size
    else:
      scale_y = W / resized_size
      scale_x = H / resized_size
    for i in range(2):
        # リサイズ後の座標を取得
        mean_y_resized =a_all_df.loc[j, f'mean_y_{i}']
        mean_x_resized =a_all_df.loc[j, f'mean_x_{i}']

        # リサイズ前の座標に戻す（スケールを掛ける）
        mean_bfr_y = mean_y_resized * scale_y
        mean_bfr_x = mean_x_resized * scale_x

        # 戻した座標を新しいカラムに格納
        a_all_df.loc[j, f'mean_bfr_y_{i}'] = mean_bfr_y
        a_all_df.loc[j, f'mean_bfr_x_{i}'] = mean_bfr_x


In [None]:
import torch

# 各マスクの切り取る前の座標を計算して新しいカラムに格納
for j in range(len(a_all_df)):
    # H_cut, W_cut はどれくらい切り取られたかを表す（NaNの場合は切り取りなし）
    H_cut = a_all_df.loc[j, 'H_cut'] if not pd.isna(a_all_df.loc[j, 'H_cut']) else 0
    W_cut = a_all_df.loc[j, 'W_cut'] if not pd.isna(a_all_df.loc[j, 'W_cut']) else 0

    for i in range(2):
        # リサイズ前の平均座標を取得
        mean_bfr_y = a_all_df.loc[j, f'mean_bfr_y_{i}']
        mean_bfr_x = a_all_df.loc[j, f'mean_bfr_x_{i}']

        # 切り取る前の座標を計算（H_cutとW_cutを戻す）
        mean_bfr_cut_y = mean_bfr_y + H_cut
        mean_bfr_cut_x = mean_bfr_x + W_cut

        # 切り取る前の座標を新しいカラムに格納
        a_all_df.loc[j, f'mean_bfr_cut_y_{i}'] = mean_bfr_cut_y
        a_all_df.loc[j, f'mean_bfr_cut_x_{i}'] = mean_bfr_cut_x

In [None]:
import numpy as np

# 3次元座標を計算してカラムに格納するコード
for j in range(len(a_all_df)):
    # DICOMのメタデータ
    x_pos = a_all_df.loc[j, 'x_pos']
    y_pos = a_all_df.loc[j, 'y_pos']
    z_pos = a_all_df.loc[j, 'z_pos']

    o0, o1, o2 = a_all_df.loc[j, ['o0', 'o1', 'o2']]  # 行方向ベクトル (ImageOrientationPatientの0〜2番目)
    o3, o4, o5 = a_all_df.loc[j, ['o3', 'o4', 'o5']]  # 列方向ベクトル (ImageOrientationPatientの3〜5番目)

    pixel_sp_y = a_all_df.loc[j, 'pixel_sp_y']  # 行方向のPixelSpacing
    pixel_sp_z = a_all_df.loc[j, 'pixel_sp_z']  # 列方向のPixelSpacing

    # 行方向ベクトル
    row_direction = np.array([o0, o1, o2])
    # 列方向ベクトル
    col_direction = np.array([o3, o4, o5])
    for i in range(2):
        # ピクセル座標を取得
        mean_y = a_all_df.loc[j, f'mean_bfr_cut_y_{i}']  # 行方向（R）
        mean_x = a_all_df.loc[j, f'mean_bfr_cut_x_{i}']  # 列方向（C）

        # 3次元座標を計算
        patient_pos = np.array([x_pos, y_pos, z_pos]) + \
                      (pixel_sp_y * mean_x) * row_direction + \
                      (pixel_sp_z * mean_y) * col_direction

        # 計算した3次元座標をカラムに格納
        a_all_df.loc[j, f'xx_{i}'] = patient_pos[0]
        a_all_df.loc[j, f'yy_{i}'] = patient_pos[1]
        a_all_df.loc[j, f'zz_{i}'] = patient_pos[2]


In [None]:
a_all_df = a_all_df.rename(columns={
    'xx_0': 'xx_0_axial',
    'yy_0': 'yy_0_axial',
    'zz_0': 'zz_0_axial',
    'xx_1': 'xx_1_axial',
    'yy_1': 'yy_1_axial',
    'zz_1': 'zz_1_axial'
})

In [None]:
with open('/content/drive/MyDrive/RSNA_csv/middle_s_all_df_rev.pkl', 'rb') as f:
  middle_s_all_df=pickle.load(f)

In [None]:
import pandas as pd

# middle_s_all_df の z_pos カラムを study_id をキーにして a_all_df に結合
a_all_df_2 = pd.merge(a_all_df, middle_s_all_df[['study_id', 'zz_0', 'zz_1', 'zz_2', 'zz_3', 'zz_4','z_pos_0','z_pos_1','z_pos_2','z_pos_3','z_pos_4']], on='study_id', how='left')

# 結果を表示
print(a_all_df_2.head())

In [None]:
import numpy as np

# levelを設定する関数
def assign_level(row):
    # 各z_posの範囲を計算（絶対値で範囲を確保）

    z0_mid = (row['zz_1'] - row['zz_0']) / 2
    z1_mid = (row['zz_2'] - row['zz_1']) / 2
    z2_mid = (row['zz_3'] - row['zz_2']) / 2
    z3_mid = (row['zz_4'] - row['zz_3']) / 2
    z4_mid = z3_mid  # z_pos_4の範囲はz_pos_3と同じ幅を使用

    bool1=(row['zz_4'] - row['zz_3']>=0)
    bool2=(row['zz_3'] - row['zz_2']>=0)
    bool3=(row['zz_2'] - row['zz_1']>=0)
    bool4=(row['zz_1'] - row['zz_0']>=0)
    if ((bool1+bool2+bool3+bool4)>=2):
      if (row['zz_1'] - z1_mid >= row['zz_0_axial']) and (row['zz_0'] - z1_mid <= row['zz_0_axial']):
          return 'L1/L2'
      elif (row['zz_1'] - z1_mid <= row['zz_0_axial']) and (row['zz_0_axial']<= row['zz_1'] + z1_mid):
          return 'L2/L3'
      elif (row['zz_1'] + z1_mid <= row['zz_0_axial']) and (row['zz_0_axial'] <= row['zz_2'] + z2_mid):
          return 'L3/L4'
      elif (row['zz_2'] + z2_mid <= row['zz_0_axial']) and (row['zz_0_axial'] <= row['zz_3'] + z3_mid):
          return 'L4/L5'
      elif (row['zz_0_axial'] >=  row['zz_3'] + z3_mid) and (row['zz_0_axial'] <=  row['zz_4'] + z3_mid):
          return 'L5/S1'
      else:
          return 'OUT'  # 該当しない場合にはNaNを返す
    else:
      if (row['zz_1'] - z1_mid <= row['zz_0_axial']) and (row['zz_0'] - z1_mid >= row['zz_0_axial']):
          return 'L1/L2'
      elif (row['zz_1'] - z1_mid >= row['zz_0_axial']) and (row['zz_0_axial'] >= row['zz_1'] + z1_mid):
          return 'L2/L3'
      elif (row['zz_1'] + z1_mid >= row['zz_0_axial']) and (row['zz_0_axial'] >= row['zz_2'] + z2_mid):
          return 'L3/L4'
      elif (row['zz_2'] + z2_mid >= row['zz_0_axial']) and (row['zz_0_axial'] >= row['zz_3'] + z3_mid):
          return 'L4/L5'
      elif (row['zz_0_axial'] <= row['zz_3'] + z3_mid) and (row['zz_0_axial'] >= row['zz_4'] + z3_mid):
          return 'L5/S1'
      else:
          return 'OUT'  # 該当しない場合にはNaNを返す

a_all_df_2['level'] = a_all_df_2.apply(assign_level, axis=1)

# 結果を確認
print(a_all_df_2[['zz_0_axial', 'level']].head())


In [None]:
import pandas as pd
import numpy as np

# 関数：グループごとに均等にデータを選び、足りない場合は補完
def select_five_from_group(group):
    group_sorted = group.sort_values(by='zz_0_axial').reset_index(drop=True)
    num_rows = len(group_sorted)

    if num_rows >= 5:
        # 均等に5つ選ぶ
        step = num_rows / 5
        selected_indices = [int(i * step) for i in range(5)]
        return group_sorted.iloc[selected_indices]

    elif num_rows == 4:
        # 4つの場合：最後のデータを追加
        selected_indices = list(range(4)) + [3]
        return group_sorted.iloc[selected_indices]
    elif num_rows == 3:
        # 3つの場合：最後の2つと1つを追加
        selected_indices = [0, 1, 2, 2, 1]
        return group_sorted.iloc[selected_indices]

    elif num_rows == 2:
        # 2つの場合：最後の1つを追加
        selected_indices = [0, 1, 1, 1, 0]
        return group_sorted.iloc[selected_indices]

    elif num_rows == 1:
        # 1つの場合：そのデータを5回繰り返す
        return pd.concat([group_sorted] * 5, ignore_index=True)


def process_dataframe(df):
    grouped_df = df.groupby(['study_id', 'series_id', 'level'], group_keys=False)
    result_df = grouped_df.apply(select_five_from_group)
    return result_df.reset_index(drop=True)

# a_all_df_2 を読み込んで処理
# a_all_df_2 = pd.read_csv('your_data.csv')  # データを読み込む部分
sampled_df = process_dataframe(a_all_df_2)

# 結果を確認
print(sampled_df)



In [None]:
merged_df = pd.merge(sampled_df,train, on='study_id', how='left')

In [None]:
import pandas as pd

# まず、merged_dfのコピーを作成して2倍にする
merged_df_left = merged_df.copy()
merged_df_left['direction'] = 'left'

merged_df_right = merged_df.copy()
merged_df_right['direction'] = 'right'

# 2つのデータフレームを縦方向に結合して行を倍にする
merged_df_doubled = pd.concat([merged_df_left, merged_df_right], ignore_index=True)

# 結果を表示
print(merged_df_doubled)

In [None]:
merged_df_doubled['direction'] = merged_df_doubled['direction'].replace({'left': 0, 'right': 1})

In [None]:
# ラベルを設定するための関数
def assign_label(row):
    if row['direction'] == 0:
        if row['level'] == 'L1/L2':
            return row['left_subarticular_stenosis_l1_l2']
        elif row['level'] == 'L2/L3':
            return row['left_subarticular_stenosis_l2_l3']
        elif row['level'] == 'L3/L4':
            return row['left_subarticular_stenosis_l3_l4']
        elif row['level'] == 'L4/L5':
            return row['left_subarticular_stenosis_l4_l5']
        elif row['level'] == 'L5/S1':
            return row['left_subarticular_stenosis_l5_s1']
    elif row['direction'] == 1:
        if row['level'] == 'L1/L2':
            return row['right_subarticular_stenosis_l1_l2']
        elif row['level'] == 'L2/L3':
            return row['right_subarticular_stenosis_l2_l3']
        elif row['level'] == 'L3/L4':
            return row['right_subarticular_stenosis_l3_l4']
        elif row['level'] == 'L4/L5':
            return row['right_subarticular_stenosis_l4_l5']
        elif row['level'] == 'L5/S1':
            return row['right_subarticular_stenosis_l5_s1']
    return None

# apply関数を使ってlabelカラムを作成
merged_df_doubled['label'] = merged_df_doubled.apply(assign_label, axis=1)

In [None]:
def group_rows(df, group_size):
    grouped = []
    num_groups = len(df) // group_size + int(len(df) % group_size != 0)

    for i in range(num_groups):
        start_idx = i * group_size
        end_idx = min((i + 1) * group_size, len(df))
        subset = df.iloc[start_idx:end_idx].reset_index(drop=True)

        # グループ内の先頭行の値を取得
        first_row = subset.iloc[0]

        # 新しいカラムに instance_number を格納
        row_data = first_row.to_dict()  # 他の列の情報も含めて最初の行の情報を取得
        row_data.update({
            f'instance_number_{j}': subset.iloc[j]['instance_number'] for j in range(len(subset))
        })

        # フィルの中身をリストに格納
        grouped.append(row_data)

    return pd.DataFrame(grouped)

# グループサイズを設定（5つの行を1行にまとめる）
group_size = 5
grouped_df_axial = group_rows(merged_df_doubled, group_size)
print(grouped_df_axial.head())

In [None]:
grouped_df_axial=grouped_df_axial[grouped_df_axial['level']!='OUT']

In [None]:
# 'study_id' と 'direction' 列で grouped_df_axial をグループ分けし、その後 'level' の重複を削除する関数
def remove_duplicates(grouped_df_axial):
    # 'study_id' と 'direction' でグループ化
    grouped = grouped_df_axial.groupby(['study_id', 'direction'])

    # 各グループ内で 'level' 列の重複を削除
    unique_groups = []
    for _, group in grouped:
        # 'level' 列に基づいて重複を削除
        unique_group = group.drop_duplicates(subset='level')
        unique_groups.append(unique_group)

    # 結果を1つの DataFrame に結合
    return pd.concat(unique_groups, ignore_index=True)

# 関数を grouped_df_axial に適用
grouped_df_axial = remove_duplicates(grouped_df_axial)


In [None]:
import pandas as pd

# 全てのレベルを定義
all_levels = ['L1/L2', 'L2/L3', 'L3/L4', 'L4/L5', 'L5/S1']

# 欠けているレベルを補完する関数を定義
def fill_missing_levels(grouped_df_axial, all_df):
    # 補完された行を格納する新しいデータフレームを作成
    filled_rows = []

    # 'study_id' と 'direction' の各組み合わせごとに繰り返し処理
    for (study_id, direction), group in grouped_df_axial.groupby(['study_id', 'direction']):
        # 現在のグループに存在する 'level' をリスト化
        existing_levels = group['level'].tolist()

        # 存在しない 'level' を探す
        missing_levels = [lvl for lvl in all_levels if lvl not in existing_levels]
        row = group.iloc[0].copy()

        # 各行に対応する series_id を取得
        for i in range(len(group)):
            row['series_id'] = group.iloc[i]['series_id']
            series_id = group.iloc[i]['series_id']
            all_group = all_df[(all_df['study_id'] == study_id) & (all_df['series_id'] == series_id)]
            instance_numbers = all_group['instance_number'].tolist()
            total_len = len(instance_numbers)
            if total_len > 1:
                series_id = group.iloc[i]['series_id']
                row['series_id'] = group.iloc[i]['series_id']
                break

        # 'all_df' から study_id と series_id に一致する行を取得
        all_group = all_df[(all_df['study_id'] == study_id) & (all_df['series_id'] == series_id)]
        instance_numbers = all_group['instance_number'].tolist()
        total_len = len(instance_numbers)

        # 存在しない各 'level' に対して補完処理を行う
        for missing_level in missing_levels:
            # 'all_df' から一致する行を取得
            row = group.iloc[0].copy()

            # 欠けている 'level' を設定
            row['level'] = missing_level

            # 欠けている 'level' に基づいて 'instance_number' を計算
            if missing_level == 'L1/L2':
                row['instance_number_0'] = instance_numbers[max(total_len // 5, 1)]
                row['instance_number_1'] = instance_numbers[max(total_len // 5, 1)]
                row['instance_number_2'] = instance_numbers[max(total_len // 5, 1)]
                row['instance_number_3'] = instance_numbers[max(total_len // 5, 1)]
                row['instance_number_4'] = instance_numbers[max(total_len // 5, 1)]
            elif missing_level == 'L2/L3':
                row['instance_number_0'] = instance_numbers[max((total_len * 2) // 5, 1)]
                row['instance_number_1'] = instance_numbers[max((total_len * 2) // 5, 1)]
                row['instance_number_2'] = instance_numbers[max((total_len * 2) // 5, 1)]
                row['instance_number_3'] = instance_numbers[max((total_len * 2) // 5, 1)]
                row['instance_number_4'] = instance_numbers[max((total_len * 2) // 5, 1)]
            elif missing_level == 'L3/L4':
                row['instance_number_0'] = instance_numbers[max((total_len * 3) // 5, 1)]
                row['instance_number_1'] = instance_numbers[max((total_len * 3) // 5, 1)]
                row['instance_number_2'] = instance_numbers[max((total_len * 3) // 5, 1)]
                row['instance_number_3'] = instance_numbers[max((total_len * 3) // 5, 1)]
                row['instance_number_4'] = instance_numbers[max((total_len * 3) // 5, 1)]
            elif missing_level == 'L4/L5':
                row['instance_number_0'] = instance_numbers[max((total_len * 4) // 5, 1)]
                row['instance_number_1'] = instance_numbers[max((total_len * 4) // 5, 1)]
                row['instance_number_2'] = instance_numbers[max((total_len * 4) // 5, 1)]
                row['instance_number_3'] = instance_numbers[max((total_len * 4) // 5, 1)]
                row['instance_number_4'] = instance_numbers[max((total_len * 4) // 5, 1)]
            elif missing_level == 'L5/S1':
                row['instance_number_0'] = instance_numbers[total_len - 1]
                row['instance_number_1'] = instance_numbers[total_len - 1]
                row['instance_number_2'] = instance_numbers[total_len - 1]
                row['instance_number_3'] = instance_numbers[total_len - 1]
                row['instance_number_4'] = instance_numbers[total_len - 1]

            # 補完された行を追加
            filled_rows.append(row)

    # 新たに補完された行を元のデータフレームと結合
    filled_df = pd.concat([grouped_df_axial, pd.DataFrame(filled_rows)], ignore_index=True)
    return filled_df

# 関数を grouped_df_axial に適用
grouped_df_axial = fill_missing_levels(grouped_df_axial, all_df)


In [None]:
grouped_df_axial=grouped_df_axial[grouped_df_axial['label'].notna()]

In [None]:
grouped_df_axial.reset_index(drop=True,inplace=True)

In [None]:
# DataFrameを保存するパス
save_path = '/content/drive/MyDrive/RSNA_csv/grouped_df_axial_rev.pkl'

# DataFrameをpkl形式で保存
grouped_df_axial.to_pickle(save_path)

print(f"DataFrame has been saved to {save_path}")

# 画像認識モデル学習部分

In [None]:
with open("/content/drive/MyDrive/RSNA_csv/grouped_df_axial_rev.pkl", 'rb') as f:
  grouped_df_axial=pickle.load(f)

In [None]:
unique_studies = grouped_df_axial['study_id'].unique()
study_mapping = {study: (i % 5) + 1 for i, study in enumerate(unique_studies)}

# study_idごとにfoldをわける
grouped_df_axial['series_description2'] = grouped_df_axial['study_id'].map(study_mapping)


In [None]:
grouped_df_axial_left=grouped_df_axial[grouped_df_axial['direction']==0]
grouped_df_axial_right=grouped_df_axial[grouped_df_axial['direction']==1]

In [None]:
tdf2 = grouped_df_axial_right[grouped_df_axial_right['series_description2'] != fold]
vdf2 = grouped_df_axial_right[grouped_df_axial_right['series_description2'] == fold]

In [None]:
tdf2 = grouped_df_axial_left[grouped_df_axial_left['series_description2'] != fold]
vdf2 = grouped_df_axial_left[grouped_df_axial_left['series_description2'] == fold]

In [None]:
vdf2.to_pickle('/content/drive/MyDrive/RSNA_csv/grouped_df_axial_right_vdf2.pkl')

In [None]:
vdf2.to_pickle('/content/drive/MyDrive/RSNA_csv/grouped_df_axial_vdf2.pkl')

In [None]:
import albumentations as A #データ拡張の処理をする

AUG_PROB = 0.75 #75%の確率でデータ拡張する
transforms_train = A.Compose([
    A.RandomBrightnessContrast(brightness_limit=(-0.2, 0.2), contrast_limit=(-0.2, 0.2), p=AUG_PROB),
    A.OneOf([
        A.MotionBlur(blur_limit=5),#動きによるぼかしをいれる
        # A.MedianBlur(blur_limit=5),
        A.GaussianBlur(blur_limit=5),#ガウスフィルタによるぼかしを入れる
        A.GaussNoise(var_limit=(5.0, 30.0)),#ランダムなガウスノイズを入れる
    ], p=AUG_PROB),

    A.OneOf([
        A.OpticalDistortion(distort_limit=1.0),#光学的な歪みを実現
        A.GridDistortion(num_steps=5, distort_limit=1.),#グリッドごとにゆがませる
        A.ElasticTransform(alpha=3),#弾性変形を適用
    ], p=AUG_PROB),

    A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=15, border_mode=0, p=AUG_PROB),#平行移動、拡大縮小、回転
  #  A.CoarseDropout(max_holes=15, max_height=30, max_width=30, min_holes=1, min_height=8, min_width=8, p=AUG_PROB),
])

In [None]:
patch_size=224#切り取るサイズ

In [None]:
import torch.nn.functional as F
class ViT_T1_Dataset(Dataset):
    def __init__(self, df, UNet, VALID=False, P=patch_size, alpha=0,transform=None):
        self.data = df
        self.UNet = UNet
        self.VALID = VALID
        self.P = P
        self.alpha = alpha
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        x = np.zeros((5, self.P, self.P), dtype=np.float32)#まずは全部0が入っているものを作るサイズはpatch_size*patch_size
        non_zero_slice=[]
        for i in range(5):

          sample = '/content/train_images/'
          sample = sample+str(self.data.iloc[index]['study_id'])+'/'+str(self.data.iloc[index]['series_id'])+'/'+str(self.data.iloc[index][f'instance_number_{i}'])+'.dcm'

          image = pydicom.dcmread(sample).pixel_array
          H,W = image.shape
          # 正方形に切り取る
          if H > W:
              d = W
              h = (H - d)//2
              image = image[h:h+d]
              # centers[:,1] -= h
              H = W
          elif H < W:
              d = H
              w = (W - d)//2
              image = image[:,w:w+d]
              # centers[:,0] -= w
              W = H
          image = cv2.resize(image,(PATCH_SIZE,PATCH_SIZE))
          image = torch.as_tensor(image/(np.max(image))).unsqueeze(0).unsqueeze(0).float().to(device)
          # x[i,...] = torch.as_tensor(image/(np.max(image))).cpu().numpy()

          # OUT = 0
          # with torch.no_grad():
                  # for rot in [0,1,2,3]:
                          # OUT += torch.rot90(self.UNet(torch.rot90(image,rot,dims=[-2, -1])),-rot,dims=[-2, -1])
          OUT=self.UNet(image)
          OUT = (OUT > TH)[0]#閾値0.5でマスクを0と1に分ける
          c = (OUT.unsqueeze(1)*idx_map[0]).view(2,2,PATCH_SIZE*PATCH_SIZE).sum(-1)
          d = OUT.view(2,PATCH_SIZE*PATCH_SIZE).sum(-1)
          m = d > 0
          c[m] = (c[m]/(d[m]).unsqueeze(-1)).long()
          c[~m] = self.P # I have to find a better solution

          image_slices = []

          for xy in c:
            y_start = max(0, xy[1] - self.P // 2)
            y_end = min(512,xy[1] + self.P - self.P // 2)
            x_start = max(0, xy[0] - self.P // 2)
            x_end = min(512,xy[0] + self.P - self.P // 2)

    # スライスが有効なサイズを持つか確認
            if (y_end - y_start == self.P) and (x_end - x_start == self.P):#切り取りの部分が途中で切れていないない場合
              slice_img = image[0, 0, y_start:y_end, x_start:x_end]
              non_zero_slice.append(slice_img)
              image_slices.append(slice_img)
            else:#切り取りの部分が途中で切れている場合
              zero_slice = torch.zeros((self.P, self.P), device=image.device)
              image_slices.append(zero_slice)
              # print(f"Skipped slice due to incorrect size: {(y_end - y_start, x_end - x_start)}")
              # print(f"Slice coordinates: {(xy[1], xy[0])}")

# スライスがあればスタックする
          if image_slices:
            try:
              image = torch.stack(image_slices)
            except RuntimeError as e:
              print(f"Error: {e}")
          else:
            print("No valid slices available for stacking.")

          # if not self.VALID: image = augment_image(image,self.alpha)
          x[i,...]=image[self.data.iloc[index]['direction']].cpu().numpy()

        for i in range(5):
          if (x[i,...].sum() == 0) and (len(non_zero_slice)>0):
            x[i,...] = non_zero_slice[0].cpu().numpy()
          else:
            pass
            #nothing
        if self.transform is not None:
          x = self.transform(image=x)['image']#albumentationを適用
        x=torch.as_tensor(x).float()
        # x = F.interpolate(x.unsqueeze(0), size=(224, 224), mode='bilinear', align_corners=False).squeeze(0)
        m=torch.as_tensor(np.ones(3)).bool()
        label = torch.as_tensor(labels[self.data.iloc[index]['label']])

        return [x.to(device),m.to(device)],[label.to(device),m.to(device)]


In [None]:
def myLoss(preds,target):
    target,mask = target
    # target=target[mask]
    # preds = preds[mask.view(-1)]
    return nn.CrossEntropyLoss(weight=torch.as_tensor([1.,2.,4.]).to(device))(preds+1e-12,target)#コンペの評価指標が重み[1.,2.,4.]だったためこのように設定

In [None]:
import timm
class ViT(nn.Module):
    def __init__(self, num_classes):
        super(ViT, self).__init__()
        # ここにViTのモデルアーキテクチャを実装
        self.vit = timm.create_model('eva02_base_patch14_224', pretrained=True, num_classes=num_classes,in_chans=5,features_only=False,global_pool='avg')#timmからモデルを持ってくる
        # self.vit.features.conv0=nn.Conv2d(5, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        # self.new_model=nn.Sequential(*self.layer)
    def forward(self, x):
        # ここに順伝播の処理を実装
        return self.vit(x)

In [None]:
from fastai.callback.core import Callback

class SaveModelCallback(Callback):#モデルをエポックごとに保存するためのコールバック関数
    def __init__(self, every_epoch=False, path='models', fname='model',with_opt=False):
        self.every_epoch = every_epoch
        self.path = path
        self.fname = fname
        self.with_opt=with_opt

    def after_epoch(self):
        # エポックごとにモデルを保存する
        if self.every_epoch:
            self.learn.save(f'{self.path}/{self.fname}_ep_{self.epoch}')

save_model_cb = SaveModelCallback(with_opt=True,every_epoch=True, path='/content/drive/MyDrive/RSNA_csv', fname=f"eva_axial_p_224_f_{fold}")

In [None]:
if 1:
    seed_everything(SEED)
    with open('/content/drive/MyDrive/RSNA_csv/'+"SEG_"+"1"+"_SS"+".pkl", 'rb') as f:
      UNet=pickle.load(f)
    tds = ViT_T1_Dataset(tdf2,UNet,transform=transforms_train)#データセット関数からデータ拡張を適用して読み込む
    vds = ViT_T1_Dataset(vdf2,UNet)#データセット関数から読み込む
    tdl = torch.utils.data.DataLoader(tds, batch_size=16, shuffle=True, drop_last=True)#データローダーの作成
    vdl = torch.utils.data.DataLoader(vds, batch_size=16, shuffle=False)#データローダーの作成

    dls = DataLoaders(tdl,vdl)#fastai用のデータローダーの作成

    n_iter = len(tds)//INF['BS']

    model = ViT(num_classes=3)#モデル作成
    model.to(device)
    learn = Learner(#Learnerを作成
        dls,
        model,
        lr=INF['LR'],
        loss_func=myLoss,
        cbs=[
            save_model_cb,
            GradientClip,
            ShowGraphCallback(),#学習曲線を表示
            # alpha_cb
        ]
    )
    learn.fit_one_cycle(40)
    # with open('/content/drive/MyDrive/RSNA_csv/'+"VIT_"+str(fold)+".pkl", 'wb') as f:
      # pickle.dump(model, f)

#混同行列の算出

In [None]:
vdf2=pd.read_pickle("/content/drive/MyDrive/RSNA_csv/grouped_df_axial_right_vdf2.pkl")

In [None]:
vdf4=pd.read_pickle("/content/drive/MyDrive/RSNA_csv/grouped_df_axial_vdf2.pkl")

In [None]:
vdf3=pd.concat([vdf2,vdf4],axis=0,ignore_index=True)
vdf3.reset_index(drop=True,inplace=True)

In [None]:
vdf3

In [None]:
ALL_CONDITIONS = sorted(["left_subarticular_stenosis", "right_subarticular_stenosis"])
LEVELS = ["l1_l2", "l2_l3", "l3_l4", "l4_l5", "l5_s1"]


In [None]:
# Pre-populate results df
import glob
import os
study_ids = vdf3['study_id'].unique().tolist()

results_df = pd.DataFrame({"row_id":[], "normal_mild": [], "moderate": [], "severe": []})
for study_id in study_ids:
    for condition in ALL_CONDITIONS:
        for level in LEVELS:
            row_id = f"{study_id}_{condition}_{level}"
            results_df = results_df._append({"row_id": row_id, "normal_mild": 1/3, "moderate": 1/3, "severe": 1/3}, ignore_index=True)

In [None]:
grouped_df_axial=vdf3
with open("/content/drive/MyDrive/RSNA_csv/SEG_1_SS.pkl", 'rb') as f:
  model2=pickle.load(f)

In [None]:

model = ViT(num_classes=3)

model = model.to(device)

checkpoint = torch.load('/content/drive/MyDrive/RSNA_csv/eva_axial_p_224_f_1_ep_15.pth')
model.load_state_dict(checkpoint['model'])

model.eval()



grouped_df_axial = grouped_df_axial.reset_index(drop=True)

test_a=ViT_T1_Dataset(grouped_df_axial,model2)

with torch.no_grad():
    for i in tqdm(range(len(test_a)), desc="Processing predictions"):
      try:
        row = grouped_df_axial.iloc[i]
        x=test_a.__getitem__(i)[0][0].unsqueeze(0)
        probabilities = F.softmax(model(x), dim=1)
      #   print(probabilities)
        if row['direction']==0:
          if row['level']=='L1/L2':
            results_df.loc[results_df['row_id']==f"{row['study_id']}_left_subarticular_stenosis_l1_l2",'normal_mild']=probabilities[0][0].item()
            results_df.loc[results_df['row_id']==f"{row['study_id']}_left_subarticular_stenosis_l1_l2",'moderate']=probabilities[0][1].item()
            results_df.loc[results_df['row_id']==f"{row['study_id']}_left_subarticular_stenosis_l1_l2",'severe']=probabilities[0][2].item()
          elif row['level']=='L2/L3':
            results_df.loc[results_df['row_id']==f"{row['study_id']}_left_subarticular_stenosis_l2_l3",'normal_mild']=probabilities[0][0].item()
            results_df.loc[results_df['row_id']==f"{row['study_id']}_left_subarticular_stenosis_l2_l3",'moderate']=probabilities[0][1].item()
            results_df.loc[results_df['row_id']==f"{row['study_id']}_left_subarticular_stenosis_l2_l3",'severe']=probabilities[0][2].item()
          elif row['level']=='L3/L4':
            results_df.loc[results_df['row_id']==f"{row['study_id']}_left_subarticular_stenosis_l3_l4",'normal_mild']=probabilities[0][0].item()
            results_df.loc[results_df['row_id']==f"{row['study_id']}_left_subarticular_stenosis_l3_l4",'moderate']=probabilities[0][1].item()
            results_df.loc[results_df['row_id']==f"{row['study_id']}_left_subarticular_stenosis_l3_l4",'severe']=probabilities[0][2].item()
          elif row['level']=='L4/L5':
            results_df.loc[results_df['row_id']==f"{row['study_id']}_left_subarticular_stenosis_l4_l5",'normal_mild']=probabilities[0][0].item()
            results_df.loc[results_df['row_id']==f"{row['study_id']}_left_subarticular_stenosis_l4_l5",'moderate']=probabilities[0][1].item()
            results_df.loc[results_df['row_id']==f"{row['study_id']}_left_subarticular_stenosis_l4_l5",'severe']=probabilities[0][2].item()
          elif row['level']=='L5/S1':
            results_df.loc[results_df['row_id']==f"{row['study_id']}_left_subarticular_stenosis_l5_s1",'normal_mild']=probabilities[0][0].item()
            results_df.loc[results_df['row_id']==f"{row['study_id']}_left_subarticular_stenosis_l5_s1",'moderate']=probabilities[0][1].item()
            results_df.loc[results_df['row_id']==f"{row['study_id']}_left_subarticular_stenosis_l5_s1",'severe']=probabilities[0][2].item()
      except Exception as e:
        print(f"An error occurred: {e}")


In [None]:

model = ViT(num_classes=3)

model = model.to(device)

checkpoint = torch.load('/content/drive/MyDrive/RSNA_csv/eva_axial_r_p_224_f_1_ep_18.pth')
model.load_state_dict(checkpoint['model'])

model.eval()

with torch.no_grad():
    for i in tqdm(range(len(test_a)), desc="Processing predictions"):
      try:
        row = grouped_df_axial.iloc[i]
        x=test_a.__getitem__(i)[0][0].unsqueeze(0)
        probabilities = F.softmax(model(x), dim=1)
      #   print(probabilities)
        if row['direction']==1:
          if row['level']=='L1/L2':
            results_df.loc[results_df['row_id']==f"{row['study_id']}_right_subarticular_stenosis_l1_l2",'normal_mild']=probabilities[0][0].item()
            results_df.loc[results_df['row_id']==f"{row['study_id']}_right_subarticular_stenosis_l1_l2",'moderate']=probabilities[0][1].item()
            results_df.loc[results_df['row_id']==f"{row['study_id']}_right_subarticular_stenosis_l1_l2",'severe']=probabilities[0][2].item()
          elif row['level']=='L2/L3':
            results_df.loc[results_df['row_id']==f"{row['study_id']}_right_subarticular_stenosis_l2_l3",'normal_mild']=probabilities[0][0].item()
            results_df.loc[results_df['row_id']==f"{row['study_id']}_right_subarticular_stenosis_l2_l3",'moderate']=probabilities[0][1].item()
            results_df.loc[results_df['row_id']==f"{row['study_id']}_right_subarticular_stenosis_l2_l3",'severe']=probabilities[0][2].item()
          elif row['level']=='L3/L4':
            results_df.loc[results_df['row_id']==f"{row['study_id']}_right_subarticular_stenosis_l3_l4",'normal_mild']=probabilities[0][0].item()
            results_df.loc[results_df['row_id']==f"{row['study_id']}_right_subarticular_stenosis_l3_l4",'moderate']=probabilities[0][1].item()
            results_df.loc[results_df['row_id']==f"{row['study_id']}_right_subarticular_stenosis_l3_l4",'severe']=probabilities[0][2].item()
          elif row['level']=='L4/L5':
            results_df.loc[results_df['row_id']==f"{row['study_id']}_right_subarticular_stenosis_l4_l5",'normal_mild']=probabilities[0][0].item()
            results_df.loc[results_df['row_id']==f"{row['study_id']}_right_subarticular_stenosis_l4_l5",'moderate']=probabilities[0][1].item()
            results_df.loc[results_df['row_id']==f"{row['study_id']}_right_subarticular_stenosis_l4_l5",'severe']=probabilities[0][2].item()
          elif row['level']=='L5/S1':
            results_df.loc[results_df['row_id']==f"{row['study_id']}_right_subarticular_stenosis_l5_s1",'normal_mild']=probabilities[0][0].item()
            results_df.loc[results_df['row_id']==f"{row['study_id']}_right_subarticular_stenosis_l5_s1",'moderate']=probabilities[0][1].item()
            results_df.loc[results_df['row_id']==f"{row['study_id']}_right_subarticular_stenosis_l5_s1",'severe']=probabilities[0][2].item()
      except Exception as e:
        print(f"An error occurred: {e}")

In [None]:
vdf3

In [None]:
results_df

In [None]:
import pandas as pd
from sklearn.metrics import confusion_matrix, classification_report

# マッピングのための辞書を用意
direction_to_condition = {
    0: "left_subarticular_stenosis",
    1: "right_subarticular_stenosis",
}

level_to_level_t = {
    "L1/L2": "l1_l2",
    "L2/L3": "l2_l3",
    "L3/L4": "l3_l4",
    "L4/L5": "l4_l5",
    "L5/S1": "l5_s1",
}

true_label_to_true_label_t={
    "Normal/Mild":"normal_mild",
    "Moderate":"moderate",
    "Severe":"severe"
}

# vdf3 の正解データを results_df にマッピング
true_labels = []
predicted_labels = []

for _, row in vdf3.iterrows():
    study_id = row['study_id']
    direction = row['direction']
    level = row['level']
    level_t=level_to_level_t[level]
    condition = direction_to_condition[direction]


    # 正解ラベル
    true_label = row['label']  # 正解ラベルがここにあると仮定
    true_label_t=true_label_to_true_label_t[true_label]
    true_labels.append(true_label_t)

    # 予測確率に基づいて予測ラベルを取得
    row_id = f"{study_id}_{condition}_{level_t}"
    predicted_row = results_df[results_df['row_id'] == row_id]

    if not predicted_row.empty:
        # 最大確率の列名が予測ラベル
        predicted_label = predicted_row[['normal_mild', 'moderate', 'severe']].idxmax(axis=1).values[0]
        predicted_labels.append(predicted_label)
    else:
        print(f"Warning: No prediction found for row_id {row_id}")
        predicted_labels.append(None)  # 空の場合は None を設定

# None を除外
true_labels_filtered = [t for t, p in zip(true_labels, predicted_labels) if p is not None]
predicted_labels_filtered = [p for p in predicted_labels if p is not None]

# 混同行列を計算
conf_matrix = confusion_matrix(true_labels_filtered, predicted_labels_filtered, labels=["normal_mild", "moderate", "severe"])

# 結果表示
print("Confusion Matrix:")
print(conf_matrix)

# 詳細レポート
print("\nClassification Report:")
print(classification_report(true_labels_filtered, predicted_labels_filtered, target_names=["normal_mild", "moderate", "severe"]))
