In [1]:
!pip install unidecode
!pip install rapidfuzz

Collecting unidecode
  Downloading Unidecode-1.4.0-py3-none-any.whl.metadata (13 kB)
Downloading Unidecode-1.4.0-py3-none-any.whl (235 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m235.8/235.8 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: unidecode
Successfully installed unidecode-1.4.0
Collecting rapidfuzz
  Downloading rapidfuzz-3.14.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (12 kB)
Downloading rapidfuzz-3.14.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m64.2 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hInstalling collected packages: rapidfuzz
Successfully installed rapidfuzz-3.14.0


In [443]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import unicodedata
import pickle
import joblib
import re

from unidecode import unidecode
from rapidfuzz import fuzz, process
from sklearn.impute import KNNImputer

from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split


from sentence_transformers import SentenceTransformer
from sklearn.decomposition import PCA

In [335]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.float_format', lambda  x: '%.3f' % x)
pd.set_option('display.width', 500)

In [416]:
data = pd.read_excel("/kaggle/input/talent-academy-case/Talent_Academy_Case_DT_2025.xlsx")

In [417]:
def data_colletion_patient_split(dataframe, test_size=0.3, random_state=42):
    unique_patients = dataframe['HastaNo'].unique()
    train_patients, test_patients = train_test_split(
        unique_patients, test_size=test_size, random_state=random_state
    )
    
    train = dataframe[dataframe['HastaNo'].isin(train_patients)].reset_index(drop=True)
    test = dataframe[dataframe['HastaNo'].isin(test_patients)].reset_index(drop=True)
    
    return train, test

train, test = data_colletion_patient_split(data)

In [418]:
def normalize_text(text):
    if pd.isnull(text):
        return text 
    text = unicodedata.normalize('NFKD', text)
    text = "".join([c for c in text if not unicodedata.combining(c)])
    return text.lower().strip()

In [419]:
Allergies = ["polen", "toz", "arveles", "coraspin", "sucuk", "novalgin", "yer fistigi", "voltaren", "gripin"]

def fuzzy_replace(text, mapping, threshold=85):
    if pd.isna(text):
        return text
    words = text.lower().split()
    new_words = []
    for w in words:
        replaced = False
        for allergi in Allergies:
            if fuzz.ratio(w.lower(), allergi) >= threshold:
                new_words.append(allergi)
                replaced = True
                break
        if not replaced:
            new_words.append(w)
    return " ".join(new_words)

train["Alerji"] = train["Alerji"].apply(lambda x: fuzzy_replace(x, Allergies))
test["Alerji"] = test["Alerji"].apply(lambda x: fuzzy_replace(x, Allergies))

train["Alerji"] = train["Alerji"].apply(normalize_text)
test["Alerji"] = test["Alerji"].apply(normalize_text)

In [420]:
cols_to_fix = ['KronikHastalik', 'Alerji']

def col_fix(df, cols_to_fix):
    for col in cols_to_fix:
        merged = (df.groupby('HastaNo')[col].apply(lambda x: ','.join(x.dropna().unique()) if x.notna().any() else np.nan).reset_index())
        df = df.drop(columns=[col]).merge(merged, on='HastaNo', how='left')
    return df

train = col_fix(train, cols_to_fix)
test = col_fix(test, cols_to_fix)

In [421]:
def create_referance_dataframe(df):
    ref_df = df[['HastaNo', 'Cinsiyet', 'KanGrubu', 'KronikHastalik', 'Alerji']].copy()
    
    ref_df = ref_df.groupby('HastaNo').agg({
        'Cinsiyet': 'first',
        'KanGrubu': 'first',
        'KronikHastalik': 'first',
        'Alerji': 'first'
    }).reset_index()
    return ref_df

ref_df = create_referance_dataframe(data)

In [422]:
def fill_missing_with_ref(df, ref_df, cols):

    ref_indexed = ref_df.set_index('HastaNo')
    
    for col in cols:
        if col in df.columns and col in ref_df.columns:
            df[col] = df[col].fillna(df['HastaNo'].map(ref_indexed[col]))
    return df

cols_to_fill = ['Cinsiyet', 'KanGrubu', 'KronikHastalik', 'Alerji']
train = fill_missing_with_ref(train, ref_df, cols_to_fill)
test = fill_missing_with_ref(test, ref_df, cols_to_fill)

In [423]:
def replace_nan_yok(df, cols):
    for col in cols:
        df[col] = df[col].fillna("Yok")
    return df

cols = ["KronikHastalik", "Alerji"]
train = replace_nan_yok(train, cols)
test = replace_nan_yok(test, cols)

In [424]:
def fill_tanilar_with_mode(row):
    if pd.isna(row["Tanilar"]):
        key = (row["TedaviAdi"])
        return mode_map.get(key, row["Tanilar"])
    return row["Tanilar"]

mode_map = (data.groupby(["TedaviAdi"])["Tanilar"].agg(lambda x: x.mode().iloc[0] if not x.mode().empty else None))
train["Tanilar"] = train.apply(fill_tanilar_with_mode, axis=1)
test["Tanilar"] = test.apply(fill_tanilar_with_mode, axis=1)

In [425]:
def fill_bolum_with_mode(row):
    if pd.isna(row["Bolum"]):
        key = (row["TedaviAdi"])
        return mode_map.get(key, row["Bolum"])
    return row["Bolum"]

mode_map = (data.groupby(["TedaviAdi"])["Bolum"].agg(lambda x: x.mode().iloc[0] if not x.mode().empty else None))
train["Bolum"] = train.apply(fill_bolum_with_mode, axis=1)
test["Bolum"] = test.apply(fill_bolum_with_mode, axis=1)

In [426]:
def fill_uygulama_yeri_with_mode(row):
    if pd.isna(row["UygulamaYerleri"]):
        key = (row["Tanilar"], row["TedaviAdi"])
        return mode_map.get(key, row["UygulamaYerleri"])
    return row["UygulamaYerleri"]


mode_map = (data.groupby(["Tanilar", "TedaviAdi"])["UygulamaYerleri"].agg(lambda x: x.mode().iloc[0] if not x.mode().empty else None))
train["UygulamaYerleri"] = train.apply(fill_uygulama_yeri_with_mode, axis=1)
test["UygulamaYerleri"] = test.apply(fill_uygulama_yeri_with_mode, axis=1)

In [427]:
def fill_uygulama_yeri_2_with_mode(row):
    if pd.isna(row["UygulamaYerleri"]):
        key = (row["HastaNo"], row["TedaviAdi"])
        return mode_map.get(key, row["UygulamaYerleri"])
    return row["UygulamaYerleri"]

mode_map = (data.groupby(["HastaNo", "TedaviAdi"])["UygulamaYerleri"].agg(lambda x: x.mode().iloc[0] if not x.mode().empty else None))
train["UygulamaYerleri"] = train.apply(fill_uygulama_yeri_2_with_mode, axis=1)
test["UygulamaYerleri"] = test.apply(fill_uygulama_yeri_2_with_mode, axis=1)

In [428]:
def select_row(df):
    df = df[~(df["UygulamaYerleri"].isna() & df["Tanilar"].isna())]
    return df

train = select_row(train)
test = select_row(test)

In [429]:
def replace_nan_diğer(df, cols):
    for col in cols:
        df[col] = df[col].fillna("diğer")
    return df

cols = ["Bolum", "UygulamaYerleri", "Cinsiyet", "KanGrubu"]
train = replace_nan_diğer(train, cols)
test = replace_nan_diğer(test, cols)

In [430]:
def replace_nan_eksik_tanı(df, cols):
    for col in cols:
        df[col] = df[col].fillna("eksik_tanı")
    return df

cols = ["Tanilar"]
train = replace_nan_eksik_tanı(train, cols)
test = replace_nan_eksik_tanı(test, cols)

In [431]:
def uygulama_suresi_replace(df):
    df["UygulamaSuresi"] = df["UygulamaSuresi"].str.replace("Dakika", "", regex=False).astype(int)
    return df

train =  uygulama_suresi_replace(train)
test =  uygulama_suresi_replace(test)

In [432]:
def drop_duplicate(df):
    df = df.drop(["HastaNo"], axis=1)
    unique_df = df.drop_duplicates().reset_index(drop=True)
    return df

unique_train = drop_duplicate(train)
unique_test = drop_duplicate(test)

In [433]:
def outlier_threshold(df, col_name, q1=0.15, q3=0.85):
    quartile1 = df[col_name].quantile(q1)
    quartile3 = df[col_name].quantile(q3)
    interquantile = quartile3 - quartile1
    low_limit = quartile1 - 1.5 * interquantile
    up_limit = quartile3 + 1.5 * interquantile
    
    return low_limit, up_limit

In [434]:
def check_outlier(df, col_name):
    low_limit, up_limit = outlier_threshold(df, col_name)
    if df[(df[col_name] < low_limit) | (df[col_name] > up_limit)].any(axis=None):
        return True
    else:
        return False
    
for col in ["Yas"]:
    print(f"Train verisinde {col} sütununda aykırı değer durumu: ", check_outlier(unique_train, col))

Train verisinde Yas sütununda aykırı değer durumu:  False


In [435]:
def onehotencoder(dataframe, train=True):
    one_hot_cat_cols = ["Cinsiyet", "KanGrubu", "Uyruk", "Bolum"]
    
    drop_map = {
        "Cinsiyet": ["diğer"], 
        "KanGrubu": ["diğer"],
        "Uyruk": ["Tokelau"],
        "Bolum": ["diğer"]
    }

    if train:
        ohe = OneHotEncoder(handle_unknown='ignore', sparse_output=False,drop=[drop_map.get(col, None) for col in one_hot_cat_cols])
        encoded_cols = ohe.fit_transform(dataframe[one_hot_cat_cols])
        joblib.dump(ohe, 'one_hot_encoder.pkl')
        new_columns = ohe.get_feature_names_out(one_hot_cat_cols)
        encoded_df = pd.DataFrame(encoded_cols, columns=new_columns, index=dataframe.index)
        dataframe = pd.concat([dataframe, encoded_df], axis=1)
        dataframe.drop(columns=one_hot_cat_cols, inplace=True)
    else:
        loaded_ohe = joblib.load('one_hot_encoder.pkl')
        encoded_test_data = loaded_ohe.transform(dataframe[one_hot_cat_cols])
        new_columns = loaded_ohe.get_feature_names_out(one_hot_cat_cols)
        encoded_test_df = pd.DataFrame(encoded_test_data, columns=new_columns, index=dataframe.index)
        dataframe = pd.concat([dataframe, encoded_test_df], axis=1)
        dataframe.drop(columns=one_hot_cat_cols, inplace=True)
    
    return dataframe

unique_train = onehotencoder(unique_train, train=True)
unique_test = onehotencoder(unique_test, train=False)



In [437]:
from sklearn.preprocessing import MultiLabelBinarizer
import pandas as pd
import joblib

def multilabel_binarize(dataframe, multilabel_cols, train=True):
    mlb_dict = {}

    for col in multilabel_cols:
        dataframe[col] = dataframe[col].fillna('').apply(
            lambda x: [v.strip().lower() for v in str(x).split(',') if v.strip()]
        )

        if train:
            mlb = MultiLabelBinarizer()
            encoded = mlb.fit_transform(dataframe[col])
            joblib.dump(mlb, f'mlb_{col}.pkl')  # Binarizer kaydet
        else:
            mlb = joblib.load(f'mlb_{col}.pkl')
            encoded = mlb.transform(dataframe[col])

        encoded_df = pd.DataFrame(
            encoded,
            columns=[f"{col}_{cls}" for cls in mlb.classes_],
            index=dataframe.index
        )

        dataframe = pd.concat([dataframe.drop(columns=[col]), encoded_df], axis=1)

        mlb_dict[col] = mlb

    return dataframe, mlb_dict


multilabel_cols = ["Alerji", "KronikHastalik"]
unique_train, mlb_models = multilabel_binarize(unique_train, multilabel_cols, train=True)
unique_test, _ = multilabel_binarize(unique_test, multilabel_cols, train=False)

In [441]:
def replace_cols(df):
    df["Alerji_voltaren"] = df["Alerji_voltaren"] + df["Alerji_volteren"]
    df["KronikHastalik_hipotirodizm"] = df["KronikHastalik_hipotirodizm"] + df["KronikHastalik_hiportiroidizm"]
    df = df.drop(["Alerji_volteren", "KronikHastalik_hiportiroidizm"], axis=1)
    return df

unique_train = replace_cols(unique_train)
unique_test = replace_cols(unique_test)

In [444]:
def preprocess_text(text):
    if not isinstance(text, str):
        text = str(text)
    text = text.lower()  
    text = re.sub(r'[^\w\s]', ' ', text)  
    text = re.sub(r'\s+', ' ', text).strip() 
    return text

def embed_text_columns(df, text_cols, model_name="dbmdz/bert-base-turkish-cased", reduced_dim=15):
    model = SentenceTransformer(model_name)
    
    for col in text_cols:
        texts = df[col].fillna("").apply(preprocess_text).tolist()
        embeddings = model.encode(texts, convert_to_numpy=True)
        
        pca = PCA(n_components=reduced_dim)
        reduced_embeddings = pca.fit_transform(embeddings)
        
        emb_df = pd.DataFrame(
            reduced_embeddings, 
            columns=[f"{col}_emb_{i}" for i in range(reduced_dim)],
            index=df.index
        )
        df = pd.concat([df.drop(columns=[col]), emb_df], axis=1)
        
    return df


text_cols = ["Tanilar", "TedaviAdi", "UygulamaYerleri"]
unique_train = embed_text_columns(unique_train, text_cols, reduced_dim=15)
unique_test = embed_text_columns(unique_test, text_cols, reduced_dim=15)


Batches:   0%|          | 0/49 [00:00<?, ?it/s]

Batches:   0%|          | 0/49 [00:00<?, ?it/s]

Batches:   0%|          | 0/49 [00:00<?, ?it/s]

Batches:   0%|          | 0/22 [00:00<?, ?it/s]

Batches:   0%|          | 0/22 [00:00<?, ?it/s]

Batches:   0%|          | 0/22 [00:00<?, ?it/s]

In [445]:
def labelencoding_tedavisuresi(dataframe, train=True):
    column_to_encode = "TedaviSuresi"

    if train:
        enc = LabelEncoder()
        dataframe[column_to_encode] = enc.fit_transform(dataframe[column_to_encode].astype(str))
        joblib.dump(enc, 'tedavisuresi_labelencoder.pkl')
    else:
        enc = joblib.load('tedavisuresi_labelencoder.pkl')
        dataframe[column_to_encode] = enc.transform(dataframe[column_to_encode].astype(str))
    
    return dataframe

unique_train = labelencoding_tedavisuresi(unique_train, train=True)
unique_test = labelencoding_tedavisuresi(unique_test, train=True)

In [446]:
def normalization(dataframe, num_cols, train=True):
    if train:
        scaler = StandardScaler()
        dataframe[num_cols] = scaler.fit_transform(dataframe[num_cols])
        joblib.dump(scaler, 'standardscaler.pkl')
    else:
        loaded_scaler = joblib.load('standardscaler.pkl')
        dataframe[num_cols] = loaded_scaler.transform(dataframe[num_cols])
        
    return dataframe

num_cols = ["Yas", "UygulamaSuresi"]
unique_train = normalization(unique_train, num_cols, train=True)
unique_test = normalization(unique_test, num_cols, train=False)

In [447]:
unique_train.head()

Unnamed: 0,Yas,TedaviSuresi,UygulamaSuresi,Cinsiyet_Erkek,Cinsiyet_Kadın,KanGrubu_0 Rh+,KanGrubu_0 Rh-,KanGrubu_A Rh+,KanGrubu_A Rh-,KanGrubu_AB Rh+,KanGrubu_B Rh+,KanGrubu_B Rh-,Uyruk_Arnavutluk,Uyruk_Azerbaycan,Uyruk_Libya,Uyruk_Türkiye,"Bolum_Fiziksel Tıp Ve Rehabilitasyon,Solunum Merkezi",Bolum_Genel Cerrahi,Bolum_Göğüs Hastalıkları,Bolum_Kalp Ve Damar Cerrahisi,Bolum_Kardiyoloji,Bolum_Laboratuar,Bolum_Nöroloji,Bolum_Ortopedi Ve Travmatoloji,Bolum_Tıbbi Onkoloji,Bolum_İç Hastalıkları,Alerji_arveles,Alerji_coraspin,Alerji_gripin,Alerji_novalgin,Alerji_polen,Alerji_sucuk,Alerji_toz,Alerji_voltaren,Alerji_yer fıstıgı,Alerji_yok,KronikHastalik_aritmi,KronikHastalik_astım,KronikHastalik_becker musküler distrofisi,KronikHastalik_diyabet,KronikHastalik_duchenne musküler distrofisi,KronikHastalik_fascioscapulohumeral distrofi,KronikHastalik_guatr,KronikHastalik_hipertansiyon,KronikHastalik_hipertiroidizm,KronikHastalik_hipotirodizm,KronikHastalik_kalp yetmezliği,KronikHastalik_limb-girdle musküler distrofi,KronikHastalik_myastenia gravis,KronikHastalik_polimiyozit,KronikHastalik_yok,Tanilar_emb_0,Tanilar_emb_1,Tanilar_emb_2,Tanilar_emb_3,Tanilar_emb_4,Tanilar_emb_5,Tanilar_emb_6,Tanilar_emb_7,Tanilar_emb_8,Tanilar_emb_9,Tanilar_emb_10,Tanilar_emb_11,Tanilar_emb_12,Tanilar_emb_13,Tanilar_emb_14,TedaviAdi_emb_0,TedaviAdi_emb_1,TedaviAdi_emb_2,TedaviAdi_emb_3,TedaviAdi_emb_4,TedaviAdi_emb_5,TedaviAdi_emb_6,TedaviAdi_emb_7,TedaviAdi_emb_8,TedaviAdi_emb_9,TedaviAdi_emb_10,TedaviAdi_emb_11,TedaviAdi_emb_12,TedaviAdi_emb_13,TedaviAdi_emb_14,UygulamaYerleri_emb_0,UygulamaYerleri_emb_1,UygulamaYerleri_emb_2,UygulamaYerleri_emb_3,UygulamaYerleri_emb_4,UygulamaYerleri_emb_5,UygulamaYerleri_emb_6,UygulamaYerleri_emb_7,UygulamaYerleri_emb_8,UygulamaYerleri_emb_9,UygulamaYerleri_emb_10,UygulamaYerleri_emb_11,UygulamaYerleri_emb_12,UygulamaYerleri_emb_13,UygulamaYerleri_emb_14
0,-1.216,4,0.541,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,-2.586,-4.504,1.327,3.092,-0.316,-0.927,-2.345,-0.845,0.396,-0.003,-0.378,0.1,-0.258,0.409,0.171,-4.158,-1.422,-3.079,1.545,0.254,-1.451,-1.512,-1.218,-0.934,0.229,0.511,0.563,-0.186,0.485,-1.005,-4.92,1.481,4.349,-0.721,-0.228,-0.407,-0.066,-0.309,-0.027,0.025,-0.164,-0.011,0.054,-0.033,-0.076
1,-1.216,4,0.541,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,-2.586,-4.504,1.327,3.092,-0.316,-0.927,-2.345,-0.845,0.396,-0.003,-0.378,0.1,-0.258,0.409,0.171,-4.158,-1.422,-3.079,1.545,0.254,-1.451,-1.512,-1.218,-0.934,0.229,0.511,0.563,-0.186,0.485,-1.005,0.767,-0.187,2.871,-1.027,-1.384,-0.933,0.874,1.195,1.202,0.947,2.952,1.05,-1.772,0.399,3.01
2,-1.216,4,-1.853,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,-2.586,-4.504,1.327,3.092,-0.316,-0.927,-2.345,-0.845,0.396,-0.003,-0.378,0.1,-0.258,0.409,0.171,-4.158,-1.422,-3.079,1.545,0.254,-1.451,-1.512,-1.218,-0.934,0.229,0.511,0.563,-0.186,0.485,-1.005,-4.92,1.481,4.349,-0.721,-0.228,-0.407,-0.066,-0.309,-0.027,0.025,-0.164,-0.011,0.054,-0.033,-0.076
3,-1.216,4,0.541,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,-2.586,-4.504,1.327,3.092,-0.316,-0.927,-2.345,-0.845,0.396,-0.003,-0.378,0.1,-0.258,0.409,0.171,-4.158,-1.422,-3.079,1.545,0.254,-1.451,-1.512,-1.218,-0.934,0.229,0.511,0.563,-0.186,0.485,-1.005,0.767,-0.187,2.871,-1.027,-1.384,-0.933,0.874,1.195,1.202,0.947,2.952,1.05,-1.772,0.399,3.01
4,-1.216,4,0.541,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,-2.586,-4.504,1.327,3.092,-0.316,-0.927,-2.345,-0.845,0.396,-0.003,-0.378,0.1,-0.258,0.409,0.171,-4.158,-1.422,-3.079,1.545,0.254,-1.451,-1.512,-1.218,-0.934,0.229,0.511,0.563,-0.186,0.485,-1.005,-4.92,1.481,4.349,-0.721,-0.228,-0.407,-0.066,-0.309,-0.027,0.025,-0.164,-0.011,0.054,-0.033,-0.076


In [448]:
unique_test.head()

Unnamed: 0,Yas,TedaviSuresi,UygulamaSuresi,Cinsiyet_Erkek,Cinsiyet_Kadın,KanGrubu_0 Rh+,KanGrubu_0 Rh-,KanGrubu_A Rh+,KanGrubu_A Rh-,KanGrubu_AB Rh+,KanGrubu_B Rh+,KanGrubu_B Rh-,Uyruk_Arnavutluk,Uyruk_Azerbaycan,Uyruk_Libya,Uyruk_Türkiye,"Bolum_Fiziksel Tıp Ve Rehabilitasyon,Solunum Merkezi",Bolum_Genel Cerrahi,Bolum_Göğüs Hastalıkları,Bolum_Kalp Ve Damar Cerrahisi,Bolum_Kardiyoloji,Bolum_Laboratuar,Bolum_Nöroloji,Bolum_Ortopedi Ve Travmatoloji,Bolum_Tıbbi Onkoloji,Bolum_İç Hastalıkları,Alerji_arveles,Alerji_coraspin,Alerji_gripin,Alerji_novalgin,Alerji_polen,Alerji_sucuk,Alerji_toz,Alerji_voltaren,Alerji_yer fıstıgı,Alerji_yok,KronikHastalik_aritmi,KronikHastalik_astım,KronikHastalik_becker musküler distrofisi,KronikHastalik_diyabet,KronikHastalik_duchenne musküler distrofisi,KronikHastalik_fascioscapulohumeral distrofi,KronikHastalik_guatr,KronikHastalik_hipertansiyon,KronikHastalik_hipertiroidizm,KronikHastalik_hipotirodizm,KronikHastalik_kalp yetmezliği,KronikHastalik_limb-girdle musküler distrofi,KronikHastalik_myastenia gravis,KronikHastalik_polimiyozit,KronikHastalik_yok,Tanilar_emb_0,Tanilar_emb_1,Tanilar_emb_2,Tanilar_emb_3,Tanilar_emb_4,Tanilar_emb_5,Tanilar_emb_6,Tanilar_emb_7,Tanilar_emb_8,Tanilar_emb_9,Tanilar_emb_10,Tanilar_emb_11,Tanilar_emb_12,Tanilar_emb_13,Tanilar_emb_14,TedaviAdi_emb_0,TedaviAdi_emb_1,TedaviAdi_emb_2,TedaviAdi_emb_3,TedaviAdi_emb_4,TedaviAdi_emb_5,TedaviAdi_emb_6,TedaviAdi_emb_7,TedaviAdi_emb_8,TedaviAdi_emb_9,TedaviAdi_emb_10,TedaviAdi_emb_11,TedaviAdi_emb_12,TedaviAdi_emb_13,TedaviAdi_emb_14,UygulamaYerleri_emb_0,UygulamaYerleri_emb_1,UygulamaYerleri_emb_2,UygulamaYerleri_emb_3,UygulamaYerleri_emb_4,UygulamaYerleri_emb_5,UygulamaYerleri_emb_6,UygulamaYerleri_emb_7,UygulamaYerleri_emb_8,UygulamaYerleri_emb_9,UygulamaYerleri_emb_10,UygulamaYerleri_emb_11,UygulamaYerleri_emb_12,UygulamaYerleri_emb_13,UygulamaYerleri_emb_14
0,0.875,12,0.541,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,3.914,1.497,3.606,-0.803,3.147,0.489,0.124,-0.396,0.063,-1.166,0.441,-0.554,-1.275,0.213,0.318,0.26,-2.401,2.362,2.732,4.23,-2.177,1.902,0.161,0.476,0.767,2.453,1.103,-0.907,2.235,-1.062,5.094,-0.877,-0.315,4.886,0.268,0.538,0.034,0.759,-0.319,1.114,0.671,0.24,-1.065,0.051,-0.111
1,1.202,2,-0.257,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,0,2.112,2.441,-1.635,0.192,3.19,-0.693,-0.766,-1.945,3.754,0.582,2.015,-0.458,-0.633,1.425,1.321,0.702,-2.061,2.983,3.125,1.937,-1.389,2.349,2.955,-0.287,2.655,-1.093,-0.827,-0.572,0.169,-0.363,7.413,-1.364,-0.062,0.544,-1.51,-0.56,3.272,0.112,1.375,-1.032,0.598,0.363,0.448,-0.322,0.526
2,1.202,2,0.541,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,0,2.112,2.441,-1.635,0.192,3.19,-0.693,-0.766,-1.945,3.754,0.582,2.015,-0.458,-0.633,1.425,1.321,0.702,-2.061,2.983,3.125,1.937,-1.389,2.349,2.955,-0.287,2.655,-1.093,-0.827,-0.572,0.169,-0.363,7.413,-1.364,-0.062,0.544,-1.51,-0.56,3.272,0.112,1.375,-1.032,0.598,0.363,0.448,-0.322,0.526
3,1.202,2,0.541,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,0,2.112,2.441,-1.635,0.192,3.19,-0.693,-0.766,-1.945,3.754,0.582,2.015,-0.458,-0.633,1.425,1.321,0.702,-2.061,2.983,3.125,1.937,-1.389,2.349,2.955,-0.287,2.655,-1.093,-0.827,-0.572,0.169,-0.363,7.413,-1.364,-0.062,0.544,-1.51,-0.56,3.272,0.112,1.375,-1.032,0.598,0.363,0.448,-0.322,0.526
4,1.202,2,-1.853,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,0,2.112,2.441,-1.635,0.192,3.19,-0.693,-0.766,-1.945,3.754,0.582,2.015,-0.458,-0.633,1.425,1.321,0.702,-2.061,2.983,3.125,1.937,-1.389,2.349,2.955,-0.287,2.655,-1.093,-0.827,-0.572,0.169,-0.363,7.413,-1.364,-0.062,0.544,-1.51,-0.56,3.272,0.112,1.375,-1.032,0.598,0.363,0.448,-0.322,0.526


In [450]:
def train_val_test_split(train, test):
    X_train = train.loc[:, ~train.columns.str.contains("TedaviSuresi")]
    y_train = train.loc[:, "TedaviSuresi"]
    
    X_test = test.loc[:, ~test.columns.str.contains("TedaviSuresi")]
    y_test = test.loc[:, "TedaviSuresi"]

    return X_train, y_train, X_test, y_test

X_train, y_train, X_test, y_test = train_val_test_split(unique_train, unique_test)

In [None]:
X_train