Strip AI Kaggle

In [1]:
import time
import os
import glob
import gc
from tqdm.notebook import tqdm

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

import pandas as pd
import cv2 as cv
import tifffile as tifi

import torch
import torch.nn as nn
import torchvision.transforms as T
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import skimage
from skimage.filters import sobel
from skimage import segmentation
from skimage.transform import resize
from skimage.measure import regionprops_table

from scipy import ndimage as ndi

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import RidgeClassifierCV
from sklearn.ensemble import RandomForestClassifier

# from PIL import Image
# Image.MAX_IMAGE_PIXELS = None

## Disabling the benchmarking feature with torch.backends.cudnn.benchmark = False 
## causes cuDNN to deterministically select an algorithm, possibly at the cost of reduced performance.
## https://pytorch.org/docs/stable/notes/randomness.html
torch.backends.cudnn.benchmark = False

np.random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x7f23b4766d90>

In [2]:
num_features = 4096
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = torch.jit.load('/kaggle/input/ptmodel/model_scripted.pt').eval().to(device)

In [3]:
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

Model's state_dict:
conv1.weight 	 torch.Size([2048, 3, 3, 3])
conv1.bias 	 torch.Size([2048])


Read in metadata

In [4]:
train_meta = pd.read_csv('/kaggle/input/mayo-clinic-strip-ai/train.csv')
test_meta = pd.read_csv('/kaggle/input/mayo-clinic-strip-ai/test.csv')
other_meta = pd.read_csv('/kaggle/input/mayo-clinic-strip-ai/other.csv')

In [5]:
# try:
#     os.mkdir("../train/")
# except:
#     pass
# for i in tqdm(range(train_meta.shape[0])):
#     img_id = train_meta.iloc[i].image_id
#     try:
#         img = cv.resize(tifi.imread('/kaggle/input/mayo-clinic-strip-ai/train/' + img_id + ".tif"), (512, 512))
#     except:
#         img = np.zeros((512,512,3), np.uint8)
#     cv.imwrite(f"../train/{img_id}.jpg", img)
#     del img
#     gc.collect()

In [6]:
try:
    os.mkdir("../test/")
except:
    pass
for i in tqdm(range(test_meta.shape[0])):
    img_id = test_meta.iloc[i].image_id
    try:
        img = cv.resize(tifi.imread('/kaggle/input/mayo-clinic-strip-ai/test/' + img_id + ".tif"), (512, 512))
    except:
        img = np.zeros((512,512,3), np.uint8)
    cv.imwrite(f"../test/{img_id}.jpg", img)
    del img
    gc.collect()

  0%|          | 0/4 [00:00<?, ?it/s]

List images for training and testing

In [7]:
# train_path = "../train/*.tif"
# train_images = sorted(list(glob.glob(train_path)))

test_path = "../test/*.jpg"
test_images = sorted(list(glob.glob(test_path)))

In [8]:
test_images

['../test/006388_0.jpg',
 '../test/008e5c_0.jpg',
 '../test/00c058_0.jpg',
 '../test/01adc5_0.jpg']

Define helper functions

In [9]:
img_to_tensor = T.ToTensor()

def read_tiff(path):
    image = cv.imread(path)
#     image = tifi.imread(path)
    filename = path.split('/')[-1].rstrip('.jpg')
    return image, filename


def convert_image_grayscale(image):
    gray_image = cv.cvtColor(image, cv.COLOR_RGB2GRAY)
    return gray_image


def segment_images(gray_image):
    elevation_map = sobel(gray_image)
    markers = np.zeros_like(gray_image)
    markers[gray_image >= gray_image.mean()] = 1
    markers[gray_image < gray_image.mean()] = 2
    segmented_img = segmentation.watershed(elevation_map, markers)
    filled_segments = ndi.binary_fill_holes(segmented_img - 1)
    labeled_segments, _ = ndi.label(filled_segments)
    return labeled_segments


def get_object_coordinates(labeled_segments):
    properties = ['area','bbox','convex_area','bbox_area', 'major_axis_length', 'minor_axis_length', 'eccentricity']
    df = pd.DataFrame(regionprops_table(labeled_segments, properties=properties))
    standard_scaler = StandardScaler()
    scaled_area = standard_scaler.fit_transform(df.area.values.reshape(-1,1))
    df['scaled_area'] = scaled_area
    df.sort_values(by="scaled_area", ascending=False, inplace=True)
    objects = df[df['scaled_area']>=.75]
    object_coordinates = [
        (int(row['bbox-0']),
         int(row['bbox-1']),
         int(row['bbox-2']),
         int(row['bbox-3']))
        for index, row in objects.iterrows()
    ]
    return object_coordinates


def crop_patch(coordinates, image):
    x1, y1, x2, y2 = coordinates
    if x2-x1<3:
        x1-=1
        x2+=1
    if y2-y1<3:
        y1-=1
        y2+=1
    cropped_image = image[x1:x2, y1:y2]
    return cropped_image


def compute_features(images):
    tic = time.time()
    j = 0
    image_patches = []
    features = []
    for img in images: 
        image, filename = read_tiff(img)
        gray_image = convert_image_grayscale(image)
        labeled_segments = segment_images(gray_image)
        object_coordinates = get_object_coordinates(labeled_segments)
        del labeled_segments, gray_image,
        gc.collect()
        for i in range(len(object_coordinates)):
            patch_name = str(filename)+"_"+str(i)
            coordinates = object_coordinates[i]
            cropped_image = crop_patch(coordinates, image)
            tensor = img_to_tensor(cropped_image)
            tensor = tensor.to(device)
            with torch.no_grad():
                feats = model(tensor.unsqueeze(0)).cpu().numpy()
            features.append(feats)
            image_patches.append(patch_name)
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
            del tensor, feats, cropped_image, coordinates, #white_space
            gc.collect()
        ten_percent = int(len(images)/10 + 1)
        if j % ten_percent == 0:
            print(f"{j+1}/{len(images)} -- {(j + 1)/ len(images) * 100:0.0f}% -- {time.time()-tic:0.2f} seconds")
            tic = time.time()
        j += 1
    data = pd.DataFrame(features, index=image_patches).rename_axis("image_patch").reset_index()
    return data

Open file, resize, convert to grayscale, segment, crop, and featurize. 

In [10]:
%%time
# train_features = compute_features(train_images)
# train_features.to_csv("train_features.csv", index=False)
train_features = pd.read_csv('/kaggle/input/feats/train_features.csv')
train_features

CPU times: user 8.06 s, sys: 333 ms, total: 8.39 s
Wall time: 10.1 s


Unnamed: 0,image_patch,0,1,2,3,4,5,6,7,8,...,4086,4087,4088,4089,4090,4091,4092,4093,4094,4095
0,006388_0_0,0.000000,0.000000,0.217632,2.844576,0.000367,0.009552,0.019485,0.042599,0.220534,...,1.127135,0.419115,4.690782,8.019439,1.335905,2.237064,0.872435,0.153166,0.000628,4.448214
1,006388_0_1,0.000096,0.000000,0.185136,3.159552,0.000458,0.015141,0.031187,0.052051,0.291841,...,0.692612,0.390752,5.192822,9.071509,1.464012,2.406117,0.738875,0.144099,0.000244,4.856488
2,006388_0_2,0.000260,0.000017,0.211779,2.974412,0.001423,0.030458,0.060330,0.076941,0.297789,...,0.696017,0.499644,5.030678,8.727257,1.445623,2.368041,0.795384,0.207377,0.000787,4.670062
3,006388_0_3,0.000000,0.000000,0.377585,2.633995,0.000485,0.011898,0.036226,0.058957,0.154289,...,1.712925,0.486500,4.150004,7.016711,1.243597,2.034447,1.093594,0.175204,0.000000,4.070055
4,006388_0_4,0.000000,0.000000,0.200048,3.128776,0.000705,0.008593,0.024372,0.032974,0.268469,...,1.073481,0.325209,4.944738,8.649192,1.415098,2.299126,0.802157,0.116285,0.000066,4.730641
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5720,ffec5c_0_4,0.000000,0.000000,0.154031,3.597332,0.000000,0.000727,0.002501,0.019784,0.151616,...,0.440054,0.506446,5.985297,10.141897,1.301973,2.795910,0.662122,0.077900,0.000000,5.265446
5721,ffec5c_1_0,0.000000,0.000000,0.198047,3.490576,0.001301,0.000411,0.001646,0.016554,0.128559,...,0.825736,0.611351,5.817813,9.577434,1.148142,2.792793,0.799057,0.093610,0.000000,5.026493
5722,ffec5c_1_1,0.000000,0.000000,0.247561,3.375679,0.000264,0.000560,0.003252,0.019890,0.100177,...,1.039665,0.710899,5.635127,9.104147,1.059099,2.746651,0.897168,0.117726,0.000000,4.841364
5723,ffec5c_1_2,0.000000,0.000000,0.531703,3.650389,0.000608,0.000679,0.001732,0.016802,0.104732,...,2.076511,0.368483,5.187261,8.640637,1.146935,2.439773,1.075648,0.055275,0.000000,4.905495


In [11]:
%%time
test_features = compute_features(test_images)
test_features

1/4 -- 25% -- 7.90 seconds
2/4 -- 50% -- 0.55 seconds
3/4 -- 75% -- 1.11 seconds
4/4 -- 100% -- 1.42 seconds
CPU times: user 6.13 s, sys: 836 ms, total: 6.97 s
Wall time: 11.2 s


Unnamed: 0,image_patch,0,1,2,3,4,5,6,7,8,...,4086,4087,4088,4089,4090,4091,4092,4093,4094,4095
0,006388_0_0,0.0,0.0,0.217632,2.844576,0.000367,0.009552,0.019485,0.042599,0.220533,...,1.127135,0.419115,4.690782,8.019439,1.335905,2.237065,0.872436,0.153166,0.000628,4.448214
1,006388_0_1,9.6e-05,0.0,0.185136,3.159551,0.000458,0.015141,0.031187,0.052051,0.291841,...,0.692612,0.390752,5.192822,9.071509,1.464013,2.406117,0.738875,0.144099,0.000244,4.856488
2,006388_0_2,0.00026,1.732702e-05,0.211779,2.974412,0.001423,0.030458,0.06033,0.076941,0.297789,...,0.696017,0.499644,5.030678,8.727257,1.445623,2.368041,0.795384,0.207377,0.000787,4.670062
3,006388_0_3,0.0,0.0,0.377585,2.633995,0.000485,0.011898,0.036226,0.058957,0.154289,...,1.712926,0.4865,4.150003,7.016711,1.243597,2.034446,1.093594,0.175204,0.0,4.070055
4,006388_0_4,0.0,0.0,0.200048,3.128777,0.000705,0.008593,0.024372,0.032974,0.268469,...,1.073481,0.325209,4.944738,8.649191,1.415098,2.299125,0.802157,0.116285,6.6e-05,4.730642
5,006388_0_5,6e-05,0.0,0.216413,3.055943,0.000374,0.007843,0.02154,0.033683,0.254285,...,1.142519,0.328573,4.829774,8.438962,1.404588,2.255395,0.831013,0.12267,0.000221,4.647614
6,006388_0_6,0.000946,2.358556e-05,0.321603,2.627807,0.004313,0.07816,0.159887,0.158461,0.336936,...,0.661681,0.734915,4.745037,8.133711,1.443798,2.294837,0.933246,0.347653,0.003728,4.330872
7,006388_0_7,0.000996,0.0,0.326262,2.785406,0.002583,0.063401,0.12059,0.147762,0.314519,...,0.674295,0.661232,4.846157,8.308887,1.47263,2.317999,0.916644,0.283598,0.001144,4.499739
8,006388_0_8,0.0,0.0,0.181675,2.91374,0.000175,0.006211,0.013608,0.044502,0.233089,...,0.988398,0.431791,4.863936,8.340229,1.340375,2.300873,0.829613,0.159901,0.0,4.538946
9,006388_0_9,0.0,0.0,0.184326,2.938176,0.000256,0.009601,0.017905,0.049976,0.250058,...,0.919022,0.400607,4.880117,8.444126,1.387755,2.295796,0.809777,0.1613,0.0,4.589939


In [12]:
%%time
train = train_features.copy()

train[['patient_id', 'image_num', 'patch_num']] = train.image_patch.str.split("_", expand=True)

train["image_id"] = train[["patient_id", "image_num"]].apply("_".join, axis=1)
train.drop(['image_patch'], axis = 1, inplace = True)

train = train.groupby(['image_id', 'patient_id'], as_index = False).mean()
train = train.set_index('image_id').join(train_meta.set_index('image_id')['label']).reset_index()
train 

CPU times: user 444 ms, sys: 199 ms, total: 642 ms
Wall time: 648 ms


Unnamed: 0,image_id,patient_id,0,1,2,3,4,5,6,7,...,4087,4088,4089,4090,4091,4092,4093,4094,4095,label
0,006388_0,006388,0.000197,0.000003,0.242391,2.882362,0.001087,0.024988,0.051070,0.076314,...,0.502290,4.848616,8.337588,1.390352,2.302466,0.860181,0.198950,0.000583,4.529106,CE
1,008e5c_0,008e5c,0.000024,0.000002,0.270909,2.935235,0.000743,0.015018,0.052232,0.079578,...,0.526898,4.944095,8.544483,1.412101,2.333537,0.843052,0.200972,0.001563,4.612858,CE
2,00c058_0,00c058,0.000000,0.000000,0.109065,3.337730,0.000680,0.000261,0.001762,0.016953,...,0.918908,6.030700,9.551153,1.003379,2.986121,0.803884,0.146643,0.000000,4.902574,LAA
3,01adc5_0,01adc5,0.000000,0.000000,0.127736,3.306621,0.000026,0.007784,0.010923,0.033333,...,0.291718,5.485496,9.667834,1.523891,2.505525,0.610795,0.079921,0.000000,5.079644,LAA
4,026c97_0,026c97,0.000000,0.000000,0.302105,3.796331,0.000026,0.000331,0.000797,0.004155,...,0.129143,5.452762,9.725958,1.488028,2.427738,0.744507,0.018328,0.000000,5.317591,CE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
749,fe9645_0,fe9645,0.000000,0.000000,0.224970,3.166368,0.005562,0.002533,0.010454,0.028663,...,0.803512,5.362029,8.524347,1.034462,2.665213,0.989150,0.156884,0.000014,4.591372,CE
750,fe9bec_0,fe9bec,0.000006,0.000000,0.317139,3.305985,0.001361,0.007623,0.022318,0.056833,...,0.581571,5.315148,8.999295,1.303592,2.517601,0.890511,0.160579,0.000000,4.841062,LAA
751,ff14e0_0,ff14e0,0.000000,0.000000,0.339726,3.088429,0.000083,0.000552,0.002502,0.010256,...,0.421593,4.990200,8.714880,1.414364,2.319745,0.733004,0.064331,0.000000,4.739640,CE
752,ffec5c_0,ffec5c,0.000000,0.000000,0.360300,3.750270,0.000001,0.000272,0.001179,0.012320,...,0.345973,5.578267,9.635439,1.324996,2.558337,0.813334,0.048899,0.000000,5.227541,LAA


In [13]:
x_train = train.drop(['image_id', 'patient_id', 'label'], axis = 1)
y_train = train['label']
x_train

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4086,4087,4088,4089,4090,4091,4092,4093,4094,4095
0,0.000197,0.000003,0.242391,2.882362,0.001087,0.024988,0.051070,0.076314,0.261335,0.0,...,0.903439,0.502290,4.848616,8.337588,1.390352,2.302466,0.860181,0.198950,0.000583,4.529106
1,0.000024,0.000002,0.270909,2.935235,0.000743,0.015018,0.052232,0.079578,0.258679,0.0,...,0.702776,0.526898,4.944095,8.544483,1.412101,2.333537,0.843052,0.200972,0.001563,4.612858
2,0.000000,0.000000,0.109065,3.337730,0.000680,0.000261,0.001762,0.016953,0.109361,0.0,...,0.617157,0.918908,6.030700,9.551153,1.003379,2.986121,0.803884,0.146643,0.000000,4.902574
3,0.000000,0.000000,0.127736,3.306621,0.000026,0.007784,0.010923,0.033333,0.273204,0.0,...,0.304865,0.291718,5.485496,9.667834,1.523891,2.505525,0.610795,0.079921,0.000000,5.079644
4,0.000000,0.000000,0.302105,3.796331,0.000026,0.000331,0.000797,0.004155,0.285716,0.0,...,1.224498,0.129143,5.452762,9.725958,1.488028,2.427738,0.744507,0.018328,0.000000,5.317591
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
749,0.000000,0.000000,0.224970,3.166368,0.005562,0.002533,0.010454,0.028663,0.171079,0.0,...,1.292953,0.803512,5.362029,8.524347,1.034462,2.665213,0.989150,0.156884,0.000014,4.591372
750,0.000006,0.000000,0.317139,3.305985,0.001361,0.007623,0.022318,0.056833,0.215332,0.0,...,0.996940,0.581571,5.315148,8.999295,1.303592,2.517601,0.890511,0.160579,0.000000,4.841062
751,0.000000,0.000000,0.339726,3.088429,0.000083,0.000552,0.002502,0.010256,0.213865,0.0,...,1.264893,0.421593,4.990200,8.714880,1.414364,2.319745,0.733004,0.064331,0.000000,4.739640
752,0.000000,0.000000,0.360300,3.750270,0.000001,0.000272,0.001179,0.012320,0.133139,0.0,...,1.182220,0.345973,5.578267,9.635439,1.324996,2.558337,0.813334,0.048899,0.000000,5.227541


In [14]:
test = test_features.copy()

test[['patient_id', 'image_num', 'patch_num']] = test.image_patch.str.split("_", expand=True)

test["image_id"] = test[["patient_id", "image_num"]].apply("_".join, axis=1)
test.drop(['image_patch'], axis = 1, inplace = True)

test = test.groupby(['image_id', 'patient_id'], as_index = False).mean()
test 

Unnamed: 0,image_id,patient_id,0,1,2,3,4,5,6,7,...,4086,4087,4088,4089,4090,4091,4092,4093,4094,4095
0,006388_0,006388,0.000197,3e-06,0.242391,2.882362,0.001087,0.024988,0.05107,0.076314,...,0.903439,0.50229,4.848616,8.337587,1.390352,2.302466,0.860181,0.19895,0.000583,4.529106
1,008e5c_0,008e5c,2.4e-05,2e-06,0.270909,2.935235,0.000743,0.015018,0.052232,0.079578,...,0.702776,0.526898,4.944094,8.544481,1.412101,2.333537,0.843052,0.200972,0.001563,4.612859
2,00c058_0,00c058,0.0,0.0,0.109065,3.33773,0.00068,0.000261,0.001762,0.016953,...,0.617157,0.918908,6.0307,9.551151,1.003379,2.986121,0.803884,0.146643,0.0,4.902575
3,01adc5_0,01adc5,0.0,0.0,0.127736,3.306621,2.6e-05,0.007784,0.010923,0.033333,...,0.304865,0.291718,5.485496,9.667833,1.523891,2.505526,0.610795,0.079921,0.0,5.079643


In [15]:
x_test = test.drop(['image_id', 'patient_id'], axis = 1)
x_test

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4086,4087,4088,4089,4090,4091,4092,4093,4094,4095
0,0.000197,3e-06,0.242391,2.882362,0.001087,0.024988,0.05107,0.076314,0.261335,0.0,...,0.903439,0.50229,4.848616,8.337587,1.390352,2.302466,0.860181,0.19895,0.000583,4.529106
1,2.4e-05,2e-06,0.270909,2.935235,0.000743,0.015018,0.052232,0.079578,0.258679,0.0,...,0.702776,0.526898,4.944094,8.544481,1.412101,2.333537,0.843052,0.200972,0.001563,4.612859
2,0.0,0.0,0.109065,3.33773,0.00068,0.000261,0.001762,0.016953,0.10936,0.0,...,0.617157,0.918908,6.0307,9.551151,1.003379,2.986121,0.803884,0.146643,0.0,4.902575
3,0.0,0.0,0.127736,3.306621,2.6e-05,0.007784,0.010923,0.033333,0.273204,0.0,...,0.304865,0.291718,5.485496,9.667833,1.523891,2.505526,0.610795,0.079921,0.0,5.079643


In [16]:
rf_class = RandomForestClassifier(max_depth=15, random_state=42, n_estimators=3000)
rf_class.fit(x_train.values, y_train.values)

RandomForestClassifier(max_depth=15, n_estimators=3000, random_state=42)

In [17]:
rf_class.score(x_train.values, y_train.values)

1.0

In [18]:
train['prediction'] = rf_class.predict(x_train.values)
train[[rf_class.classes_[0], rf_class.classes_[1]]] = rf_class.predict_proba(x_train.values)
train

Unnamed: 0,image_id,patient_id,0,1,2,3,4,5,6,7,...,4090,4091,4092,4093,4094,4095,label,prediction,CE,LAA
0,006388_0,006388,0.000197,0.000003,0.242391,2.882362,0.001087,0.024988,0.051070,0.076314,...,1.390352,2.302466,0.860181,0.198950,0.000583,4.529106,CE,CE,0.845328,0.154672
1,008e5c_0,008e5c,0.000024,0.000002,0.270909,2.935235,0.000743,0.015018,0.052232,0.079578,...,1.412101,2.333537,0.843052,0.200972,0.001563,4.612858,CE,CE,0.828007,0.171993
2,00c058_0,00c058,0.000000,0.000000,0.109065,3.337730,0.000680,0.000261,0.001762,0.016953,...,1.003379,2.986121,0.803884,0.146643,0.000000,4.902574,LAA,LAA,0.336436,0.663564
3,01adc5_0,01adc5,0.000000,0.000000,0.127736,3.306621,0.000026,0.007784,0.010923,0.033333,...,1.523891,2.505525,0.610795,0.079921,0.000000,5.079644,LAA,LAA,0.219727,0.780273
4,026c97_0,026c97,0.000000,0.000000,0.302105,3.796331,0.000026,0.000331,0.000797,0.004155,...,1.488028,2.427738,0.744507,0.018328,0.000000,5.317591,CE,CE,0.916592,0.083408
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
749,fe9645_0,fe9645,0.000000,0.000000,0.224970,3.166368,0.005562,0.002533,0.010454,0.028663,...,1.034462,2.665213,0.989150,0.156884,0.000014,4.591372,CE,CE,0.934212,0.065788
750,fe9bec_0,fe9bec,0.000006,0.000000,0.317139,3.305985,0.001361,0.007623,0.022318,0.056833,...,1.303592,2.517601,0.890511,0.160579,0.000000,4.841062,LAA,LAA,0.276391,0.723609
751,ff14e0_0,ff14e0,0.000000,0.000000,0.339726,3.088429,0.000083,0.000552,0.002502,0.010256,...,1.414364,2.319745,0.733004,0.064331,0.000000,4.739640,CE,CE,0.823946,0.176054
752,ffec5c_0,ffec5c,0.000000,0.000000,0.360300,3.750270,0.000001,0.000272,0.001179,0.012320,...,1.324996,2.558337,0.813334,0.048899,0.000000,5.227541,LAA,LAA,0.266620,0.733380


In [19]:
train.groupby(['patient_id'], as_index = False)[['CE', 'LAA']].mean()

Unnamed: 0,patient_id,CE,LAA
0,006388,0.845328,0.154672
1,008e5c,0.828007,0.171993
2,00c058,0.336436,0.663564
3,01adc5,0.219727,0.780273
4,026c97,0.916592,0.083408
...,...,...,...
627,fe0cca,0.899302,0.100698
628,fe9645,0.934212,0.065788
629,fe9bec,0.276391,0.723609
630,ff14e0,0.823946,0.176054


In [20]:
# test['prediction'] = rf_class.predict(x_test)
test[[rf_class.classes_[0], rf_class.classes_[1]]] = rf_class.predict_proba(x_test.values)

In [21]:
submission = test.groupby(['patient_id'], as_index = False)[['CE', 'LAA']].mean()
submission

Unnamed: 0,patient_id,CE,LAA
0,006388,0.845328,0.154672
1,008e5c,0.828007,0.171993
2,00c058,0.336436,0.663564
3,01adc5,0.219727,0.780273


In [22]:
submission.to_csv("submission.csv", index = False)