## Import libraries

In [1]:
import gc
import pickle
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score
from sklearn.model_selection import StratifiedKFold

from sklearn.ensemble import VotingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

## Prepare data

In [2]:
with open("../input/he-pgd-turkey-ham/MathCoThon_Ready_Sandwich.txt", 'rb') as handle: 
    data = handle.read()

processed_data = pickle.loads(data)
train_df = processed_data['train_df']

del processed_data
gc.collect()

0

In [3]:
temp_df = train_df.groupby(['target']).size().reset_index().rename(columns={0:'count'})
total_count = np.sum(temp_df['count'].values)
temp_df['class%'] = (temp_df['count'] / total_count) * 100
lowest_pct = min(temp_df['class%'])
temp_df['class_weight'] = lowest_pct / temp_df['class%']
class_weight = temp_df[['target', 'class_weight']].to_dict()['class_weight']
class_weight

{0: 0.15502555366269166,
 1: 0.019431988041853514,
 2: 0.022916142029715435,
 3: 0.6842105263157895,
 4: 1.0,
 5: 0.05505142165759225,
 6: 0.02893481717011128,
 7: 0.07410423452768729,
 8: 0.03560250391236306}

In [4]:
ds = np.load('../input/he-pgd-deep-neural/DNN_Meta_Features.npz', allow_pickle=True)
y_pred_meta_dnn, y_pred_final_dnn = ds['y_pred_meta_dnn'], ds['y_pred_final_dnn']

ds = np.load('../input/he-pgd-xgboost/XGB_Meta_Features.npz', allow_pickle=True)
y_pred_meta_xgb, y_pred_final_xgb = ds['y_pred_meta_xgb'], ds['y_pred_final_xgb']

ds = np.load('../input/he-pgd-lightgbm/LGB_Meta_Features.npz', allow_pickle=True)
y_pred_meta_lgb, y_pred_final_lgb = ds['y_pred_meta_lgb'], ds['y_pred_final_lgb']

ds = np.load('../input/he-pgd-catboost/CB_Meta_Features.npz', allow_pickle=True)
y_pred_meta_cb, y_pred_final_cb = ds['y_pred_meta_cb'], ds['y_pred_final_cb']

In [5]:
Xtrain_meta = np.concatenate((y_pred_meta_dnn, y_pred_meta_lgb, y_pred_meta_cb), axis=1)
Xtest_meta = np.concatenate((y_pred_final_dnn, y_pred_final_lgb, y_pred_final_cb), axis=1)
Ytrain_meta = train_df['target'].values

print("Xtrain_meta shape: {}".format(Xtrain_meta.shape))
print("Ytrain_meta shape: {}".format(Ytrain_meta.shape))
print("Xtest_meta shape: {}".format(Xtest_meta.shape))

Xtrain_meta shape: (18047, 27)
Ytrain_meta shape: (18047,)
Xtest_meta shape: (9465, 27)


## Logistic Regression

In [6]:
FOLD = 10
SEEDS = [2020, 2022]

counter = 0
oof_score = 0
y_pred_meta_lr = np.zeros((Ytrain_meta.shape[0], 9))
y_pred_final_lr = np.zeros((Xtest_meta.shape[0], 9))


for sidx, seed in enumerate(SEEDS):
    seed_score = 0
    
    kfold = StratifiedKFold(n_splits=FOLD, shuffle=True, random_state=seed)

    for idx, (train, val) in enumerate(kfold.split(Xtrain_meta, Ytrain_meta)):
        counter += 1

        train_x, train_y = Xtrain_meta[train], Ytrain_meta[train]
        val_x, val_y = Xtrain_meta[val], Ytrain_meta[val]

        model = LogisticRegression(
            max_iter=2000,
            class_weight=class_weight, 
            random_state=seed+idx
        )

        model.fit(train_x, train_y)

        y_pred = model.predict_proba(val_x)
        y_pred_meta_lr[val] += y_pred
        y_pred_final_lr += model.predict_proba(Xtest_meta)
        
        y_pred = np.array([np.argmax(y_pred, axis=1)]).T
        score = 100 * f1_score(val_y, y_pred, average='macro')
        oof_score += score
        seed_score += score
        print("Seed-{} | Fold-{} | OOF Score: {}".format(seed, idx, score))
    
    print("\nSeed: {} | Aggregate OOF Score: {}\n\n".format(seed, (seed_score / FOLD)))


y_pred_meta_lr = y_pred_meta_lr / float(len(SEEDS))
y_pred_final_lr = y_pred_final_lr / float(counter)
oof_score /= float(counter)
print("Aggregate OOF Score: {}".format(oof_score))

Seed-2020 | Fold-0 | OOF Score: 29.65413440371789
Seed-2020 | Fold-1 | OOF Score: 30.35679427772333
Seed-2020 | Fold-2 | OOF Score: 29.74534817174276
Seed-2020 | Fold-3 | OOF Score: 29.510301390992183
Seed-2020 | Fold-4 | OOF Score: 30.605977137406647
Seed-2020 | Fold-5 | OOF Score: 30.210733953630303
Seed-2020 | Fold-6 | OOF Score: 28.77085713671726
Seed-2020 | Fold-7 | OOF Score: 31.11383917155333
Seed-2020 | Fold-8 | OOF Score: 31.411960466487226
Seed-2020 | Fold-9 | OOF Score: 30.815236123460142

Seed: 2020 | Aggregate OOF Score: 30.21951822334311


Seed-2022 | Fold-0 | OOF Score: 30.458694407545373
Seed-2022 | Fold-1 | OOF Score: 30.70083307132585
Seed-2022 | Fold-2 | OOF Score: 29.492536182632893
Seed-2022 | Fold-3 | OOF Score: 31.80135558730197
Seed-2022 | Fold-4 | OOF Score: 31.228988837982392
Seed-2022 | Fold-5 | OOF Score: 30.067224133561204
Seed-2022 | Fold-6 | OOF Score: 29.751537510523708
Seed-2022 | Fold-7 | OOF Score: 30.587469258004134
Seed-2022 | Fold-8 | OOF Score: 28

In [7]:
test_df = pd.read_csv("../input/predict-generic-disorder/dataset/test.csv")
submit_df = pd.DataFrame()
submit_df['Patient Id'] = test_df['Patient Id']
submit_df['target'] = np.array([np.argmax(y_pred_final_lr, axis=1)]).T

submit_df['target'] = submit_df['target'].map({
    0: "Mitochondrial genetic inheritance disorders//Leber's hereditary optic neuropathy",
    1: 'Mitochondrial genetic inheritance disorders//Leigh syndrome',
    2: 'Mitochondrial genetic inheritance disorders//Mitochondrial myopathy',
    3: "Multifactorial genetic inheritance disorders//Alzheimer's",
    4: 'Multifactorial genetic inheritance disorders//Cancer',
    5: 'Multifactorial genetic inheritance disorders//Diabetes',
    6: 'Single-gene inheritance diseases//Cystic fibrosis',
    7: 'Single-gene inheritance diseases//Hemochromatosis',
    8: 'Single-gene inheritance diseases//Tay-Sachs'
})

submit_df['Genetic Disorder'] = submit_df['target'].apply(lambda x: x.split('//')[0])
submit_df['Disorder Subclass'] = submit_df['target'].apply(lambda x: x.split('//')[1])
submit_df.drop(['target'], axis=1, inplace=True)
submit_df.to_csv("./LR_Submission.csv", index=False)
submit_df.head()

Unnamed: 0,Patient Id,Genetic Disorder,Disorder Subclass
0,PID0x4175,Multifactorial genetic inheritance disorders,Diabetes
1,PID0x21f5,Single-gene inheritance diseases,Hemochromatosis
2,PID0x49b8,Single-gene inheritance diseases,Hemochromatosis
3,PID0x2d97,Mitochondrial genetic inheritance disorders,Leigh syndrome
4,PID0x58da,Single-gene inheritance diseases,Cystic fibrosis


## RandomForest Classifier

In [8]:
FOLD = 10
SEEDS = [2020, 2022]

counter = 0
oof_score = 0
y_pred_meta_rfc = np.zeros((Ytrain_meta.shape[0], 9))
y_pred_final_rfc = np.zeros((Xtest_meta.shape[0], 9))


for sidx, seed in enumerate(SEEDS):
    seed_score = 0
    
    kfold = StratifiedKFold(n_splits=FOLD, shuffle=True, random_state=seed)

    for idx, (train, val) in enumerate(kfold.split(Xtrain_meta, Ytrain_meta)):
        counter += 1

        train_x, train_y = Xtrain_meta[train], Ytrain_meta[train]
        val_x, val_y = Xtrain_meta[val], Ytrain_meta[val]

        model = RandomForestClassifier(
            max_depth=6, 
            class_weight=class_weight,
            random_state=seed+idx
        )

        model.fit(train_x, train_y)

        y_pred = model.predict_proba(val_x)
        y_pred_meta_rfc[val] += y_pred
        y_pred_final_rfc += model.predict_proba(Xtest_meta)
        
        y_pred = np.array([np.argmax(y_pred, axis=1)]).T
        score = 100 * f1_score(val_y, y_pred, average='macro')
        oof_score += score
        seed_score += score
        print("Seed-{} | Fold-{} | OOF Score: {}".format(seed, idx, score))
    
    print("\nSeed: {} | Aggregate OOF Score: {}\n\n".format(seed, (seed_score / FOLD)))


y_pred_meta_rfc = y_pred_meta_rfc / float(len(SEEDS))
y_pred_final_rfc = y_pred_final_rfc / float(counter)
oof_score /= float(counter)
print("Aggregate OOF Score: {}".format(oof_score))

Seed-2020 | Fold-0 | OOF Score: 30.519664400400252
Seed-2020 | Fold-1 | OOF Score: 31.787670238319404
Seed-2020 | Fold-2 | OOF Score: 31.78132616874037
Seed-2020 | Fold-3 | OOF Score: 30.746176571453216
Seed-2020 | Fold-4 | OOF Score: 31.35915344585007
Seed-2020 | Fold-5 | OOF Score: 32.79611337244707
Seed-2020 | Fold-6 | OOF Score: 29.77961796608295
Seed-2020 | Fold-7 | OOF Score: 33.93239815619239
Seed-2020 | Fold-8 | OOF Score: 32.33700234053873
Seed-2020 | Fold-9 | OOF Score: 33.707819164729734

Seed: 2020 | Aggregate OOF Score: 31.874694182475416


Seed-2022 | Fold-0 | OOF Score: 31.3886443997786
Seed-2022 | Fold-1 | OOF Score: 33.69699289872547
Seed-2022 | Fold-2 | OOF Score: 32.129990315646694
Seed-2022 | Fold-3 | OOF Score: 31.960558664829513
Seed-2022 | Fold-4 | OOF Score: 34.428922489465194
Seed-2022 | Fold-5 | OOF Score: 30.855717260036723
Seed-2022 | Fold-6 | OOF Score: 31.227813230671725
Seed-2022 | Fold-7 | OOF Score: 33.398012310958805
Seed-2022 | Fold-8 | OOF Score: 28.

In [9]:
test_df = pd.read_csv("../input/predict-generic-disorder/dataset/test.csv")
submit_df = pd.DataFrame()
submit_df['Patient Id'] = test_df['Patient Id']
submit_df['target'] = np.array([np.argmax(y_pred_final_rfc, axis=1)]).T

submit_df['target'] = submit_df['target'].map({
    0: "Mitochondrial genetic inheritance disorders//Leber's hereditary optic neuropathy",
    1: 'Mitochondrial genetic inheritance disorders//Leigh syndrome',
    2: 'Mitochondrial genetic inheritance disorders//Mitochondrial myopathy',
    3: "Multifactorial genetic inheritance disorders//Alzheimer's",
    4: 'Multifactorial genetic inheritance disorders//Cancer',
    5: 'Multifactorial genetic inheritance disorders//Diabetes',
    6: 'Single-gene inheritance diseases//Cystic fibrosis',
    7: 'Single-gene inheritance diseases//Hemochromatosis',
    8: 'Single-gene inheritance diseases//Tay-Sachs'
})

submit_df['Genetic Disorder'] = submit_df['target'].apply(lambda x: x.split('//')[0])
submit_df['Disorder Subclass'] = submit_df['target'].apply(lambda x: x.split('//')[1])
submit_df.drop(['target'], axis=1, inplace=True)
submit_df.to_csv("./RFC_Submission.csv", index=False)
submit_df.head()

Unnamed: 0,Patient Id,Genetic Disorder,Disorder Subclass
0,PID0x4175,Multifactorial genetic inheritance disorders,Diabetes
1,PID0x21f5,Single-gene inheritance diseases,Hemochromatosis
2,PID0x49b8,Single-gene inheritance diseases,Tay-Sachs
3,PID0x2d97,Mitochondrial genetic inheritance disorders,Leigh syndrome
4,PID0x58da,Single-gene inheritance diseases,Cystic fibrosis


## MLP Classifier

In [10]:
FOLD = 10
SEEDS = [2020, 2022]

counter = 0
oof_score = 0
y_pred_meta_mlp = np.zeros((Ytrain_meta.shape[0], 9))
y_pred_final_mlp = np.zeros((Xtest_meta.shape[0], 9))


for sidx, seed in enumerate(SEEDS):
    seed_score = 0
    
    kfold = StratifiedKFold(n_splits=FOLD, shuffle=True, random_state=seed)

    for idx, (train, val) in enumerate(kfold.split(Xtrain_meta, Ytrain_meta)):
        counter += 1

        train_x, train_y = Xtrain_meta[train], Ytrain_meta[train]
        val_x, val_y = Xtrain_meta[val], Ytrain_meta[val]

        model = MLPClassifier(
            #hidden_layer_sizes=(64, 32),
            learning_rate='adaptive',
            early_stopping=True,
            n_iter_no_change=30,
            random_state=seed+idx, 
            max_iter=1000,
            verbose=False
        )

        model.fit(train_x, train_y)

        y_pred = model.predict_proba(val_x)
        y_pred_meta_mlp[val] += y_pred
        y_pred_final_mlp += model.predict_proba(Xtest_meta)
        
        y_pred = np.array([np.argmax(y_pred, axis=1)]).T
        score = 100 * f1_score(val_y, y_pred, average='macro')
        oof_score += score
        seed_score += score
        print("Seed-{} | Fold-{} | OOF Score: {}".format(seed, idx, score))
    
    print("\nSeed: {} | Aggregate OOF Score: {}\n\n".format(seed, (seed_score / FOLD)))


y_pred_meta_mlp = y_pred_meta_mlp / float(len(SEEDS))
y_pred_final_mlp = y_pred_final_mlp / float(counter)
oof_score /= float(counter)
print("Aggregate OOF Score: {}".format(oof_score))

Seed-2020 | Fold-0 | OOF Score: 31.166992847353303
Seed-2020 | Fold-1 | OOF Score: 29.142757239501098
Seed-2020 | Fold-2 | OOF Score: 31.915232720436048
Seed-2020 | Fold-3 | OOF Score: 28.103258917497275
Seed-2020 | Fold-4 | OOF Score: 27.959852322377166
Seed-2020 | Fold-5 | OOF Score: 29.434728287080347
Seed-2020 | Fold-6 | OOF Score: 28.938343130557577
Seed-2020 | Fold-7 | OOF Score: 27.47965573342881
Seed-2020 | Fold-8 | OOF Score: 31.525196462618677
Seed-2020 | Fold-9 | OOF Score: 31.710410510111313

Seed: 2020 | Aggregate OOF Score: 29.737642817096162


Seed-2022 | Fold-0 | OOF Score: 30.759824537363567
Seed-2022 | Fold-1 | OOF Score: 26.423333134068482
Seed-2022 | Fold-2 | OOF Score: 30.444281690311424
Seed-2022 | Fold-3 | OOF Score: 30.895202744492956
Seed-2022 | Fold-4 | OOF Score: 30.8211062239709
Seed-2022 | Fold-5 | OOF Score: 33.899643727674004
Seed-2022 | Fold-6 | OOF Score: 29.794382895925963
Seed-2022 | Fold-7 | OOF Score: 27.216958729202084
Seed-2022 | Fold-8 | OOF Scor

In [11]:
test_df = pd.read_csv("../input/predict-generic-disorder/dataset/test.csv")
submit_df = pd.DataFrame()
submit_df['Patient Id'] = test_df['Patient Id']
submit_df['target'] = np.array([np.argmax(y_pred_final_mlp, axis=1)]).T

submit_df['target'] = submit_df['target'].map({
    0: "Mitochondrial genetic inheritance disorders//Leber's hereditary optic neuropathy",
    1: 'Mitochondrial genetic inheritance disorders//Leigh syndrome',
    2: 'Mitochondrial genetic inheritance disorders//Mitochondrial myopathy',
    3: "Multifactorial genetic inheritance disorders//Alzheimer's",
    4: 'Multifactorial genetic inheritance disorders//Cancer',
    5: 'Multifactorial genetic inheritance disorders//Diabetes',
    6: 'Single-gene inheritance diseases//Cystic fibrosis',
    7: 'Single-gene inheritance diseases//Hemochromatosis',
    8: 'Single-gene inheritance diseases//Tay-Sachs'
})

submit_df['Genetic Disorder'] = submit_df['target'].apply(lambda x: x.split('//')[0])
submit_df['Disorder Subclass'] = submit_df['target'].apply(lambda x: x.split('//')[1])
submit_df.drop(['target'], axis=1, inplace=True)
submit_df.to_csv("./MLP_Submission.csv", index=False)
submit_df.head()

Unnamed: 0,Patient Id,Genetic Disorder,Disorder Subclass
0,PID0x4175,Single-gene inheritance diseases,Cystic fibrosis
1,PID0x21f5,Single-gene inheritance diseases,Tay-Sachs
2,PID0x49b8,Single-gene inheritance diseases,Tay-Sachs
3,PID0x2d97,Mitochondrial genetic inheritance disorders,Leigh syndrome
4,PID0x58da,Single-gene inheritance diseases,Cystic fibrosis


## Voting Classifier

In [12]:
FOLD = 10
SEEDS = [2020, 2022]

counter = 0
oof_score = 0
y_pred_meta_vc = np.zeros((Ytrain_meta.shape[0], 9))
y_pred_final_vc = np.zeros((Xtest_meta.shape[0], 9))


for sidx, seed in enumerate(SEEDS):
    seed_score = 0
    
    kfold = StratifiedKFold(n_splits=FOLD, shuffle=True, random_state=seed)

    for idx, (train, val) in enumerate(kfold.split(Xtrain_meta, Ytrain_meta)):
        counter += 1

        train_x, train_y = Xtrain_meta[train], Ytrain_meta[train]
        val_x, val_y = Xtrain_meta[val], Ytrain_meta[val]

        m1 = LogisticRegression(
            max_iter=2000,
            class_weight=class_weight, 
            random_state=seed+idx
        )
        m2 = RandomForestClassifier(
            max_depth=6, 
            class_weight=class_weight,
            random_state=seed+idx
        )
        m3 = MLPClassifier(
            #hidden_layer_sizes=(64, 32),
            learning_rate='adaptive',
            early_stopping=True,
            n_iter_no_change=30,
            random_state=seed+idx, 
            max_iter=1000,
            verbose=False
        )

        model = VotingClassifier([('lr', m1), ('rfc', m2), ('mlp', m3)],
                                 voting='soft', weights=[1,3,1])

        model.fit(train_x, train_y)

        y_pred = model.predict_proba(val_x)
        y_pred_meta_vc[val] += y_pred
        y_pred_final_vc += model.predict_proba(Xtest_meta)
        
        y_pred = np.array([np.argmax(y_pred, axis=1)]).T
        score = 100 * f1_score(val_y, y_pred, average='macro')
        oof_score += score
        seed_score += score
        print("Seed-{} | Fold-{} | OOF Score: {}".format(seed, idx, score))
    
    print("\nSeed: {} | Aggregate OOF Score: {}\n\n".format(seed, (seed_score / FOLD)))


y_pred_meta_vc = y_pred_meta_vc / float(len(SEEDS))
y_pred_final_vc = y_pred_final_vc / float(counter)
oof_score /= float(counter)
print("Aggregate OOF Score: {}".format(oof_score))

Seed-2020 | Fold-0 | OOF Score: 32.128077573744214
Seed-2020 | Fold-1 | OOF Score: 33.70375023484866
Seed-2020 | Fold-2 | OOF Score: 32.91977360295026
Seed-2020 | Fold-3 | OOF Score: 32.53283587263253
Seed-2020 | Fold-4 | OOF Score: 33.99153365529575
Seed-2020 | Fold-5 | OOF Score: 33.92071394572671
Seed-2020 | Fold-6 | OOF Score: 30.322924686034348
Seed-2020 | Fold-7 | OOF Score: 35.78377449771109
Seed-2020 | Fold-8 | OOF Score: 34.75058823422075
Seed-2020 | Fold-9 | OOF Score: 34.436327421873095

Seed: 2020 | Aggregate OOF Score: 33.44902997250374


Seed-2022 | Fold-0 | OOF Score: 34.398318604338165
Seed-2022 | Fold-1 | OOF Score: 34.77879393769285
Seed-2022 | Fold-2 | OOF Score: 34.70379482477941
Seed-2022 | Fold-3 | OOF Score: 33.926004868256804
Seed-2022 | Fold-4 | OOF Score: 36.701164242674125
Seed-2022 | Fold-5 | OOF Score: 32.816322508867835
Seed-2022 | Fold-6 | OOF Score: 32.79577338455168
Seed-2022 | Fold-7 | OOF Score: 33.86724113105243
Seed-2022 | Fold-8 | OOF Score: 28.885

In [13]:
test_df = pd.read_csv("../input/predict-generic-disorder/dataset/test.csv")
submit_df = pd.DataFrame()
submit_df['Patient Id'] = test_df['Patient Id']
submit_df['target'] = np.array([np.argmax(y_pred_final_vc, axis=1)]).T

submit_df['target'] = submit_df['target'].map({
    0: "Mitochondrial genetic inheritance disorders//Leber's hereditary optic neuropathy",
    1: 'Mitochondrial genetic inheritance disorders//Leigh syndrome',
    2: 'Mitochondrial genetic inheritance disorders//Mitochondrial myopathy',
    3: "Multifactorial genetic inheritance disorders//Alzheimer's",
    4: 'Multifactorial genetic inheritance disorders//Cancer',
    5: 'Multifactorial genetic inheritance disorders//Diabetes',
    6: 'Single-gene inheritance diseases//Cystic fibrosis',
    7: 'Single-gene inheritance diseases//Hemochromatosis',
    8: 'Single-gene inheritance diseases//Tay-Sachs'
})

submit_df['Genetic Disorder'] = submit_df['target'].apply(lambda x: x.split('//')[0])
submit_df['Disorder Subclass'] = submit_df['target'].apply(lambda x: x.split('//')[1])
submit_df.drop(['target'], axis=1, inplace=True)
submit_df.to_csv("./VC_Submission.csv", index=False)
submit_df.head()

Unnamed: 0,Patient Id,Genetic Disorder,Disorder Subclass
0,PID0x4175,Multifactorial genetic inheritance disorders,Diabetes
1,PID0x21f5,Single-gene inheritance diseases,Hemochromatosis
2,PID0x49b8,Single-gene inheritance diseases,Tay-Sachs
3,PID0x2d97,Mitochondrial genetic inheritance disorders,Leigh syndrome
4,PID0x58da,Single-gene inheritance diseases,Cystic fibrosis


In [14]:
y_pred_final = (y_pred_final_lr * 0.1) + \
               (y_pred_final_rfc * 0.35) + \
               (y_pred_final_mlp * 0.05) + \
               (y_pred_final_vc * 0.5)

test_df = pd.read_csv("../input/predict-generic-disorder/dataset/test.csv")
submit_df = pd.DataFrame()
submit_df['Patient Id'] = test_df['Patient Id']
submit_df['target'] = np.array([np.argmax(y_pred_final, axis=1)]).T

submit_df['target'] = submit_df['target'].map({
    0: "Mitochondrial genetic inheritance disorders//Leber's hereditary optic neuropathy",
    1: 'Mitochondrial genetic inheritance disorders//Leigh syndrome',
    2: 'Mitochondrial genetic inheritance disorders//Mitochondrial myopathy',
    3: "Multifactorial genetic inheritance disorders//Alzheimer's",
    4: 'Multifactorial genetic inheritance disorders//Cancer',
    5: 'Multifactorial genetic inheritance disorders//Diabetes',
    6: 'Single-gene inheritance diseases//Cystic fibrosis',
    7: 'Single-gene inheritance diseases//Hemochromatosis',
    8: 'Single-gene inheritance diseases//Tay-Sachs'
})

submit_df['Genetic Disorder'] = submit_df['target'].apply(lambda x: x.split('//')[0])
submit_df['Disorder Subclass'] = submit_df['target'].apply(lambda x: x.split('//')[1])
submit_df.drop(['target'], axis=1, inplace=True)
submit_df.to_csv("./BLEND_Submission.csv", index=False)
submit_df.head()

Unnamed: 0,Patient Id,Genetic Disorder,Disorder Subclass
0,PID0x4175,Multifactorial genetic inheritance disorders,Diabetes
1,PID0x21f5,Single-gene inheritance diseases,Hemochromatosis
2,PID0x49b8,Single-gene inheritance diseases,Tay-Sachs
3,PID0x2d97,Mitochondrial genetic inheritance disorders,Leigh syndrome
4,PID0x58da,Single-gene inheritance diseases,Cystic fibrosis
