In [1]:
from preprocessing import preprocess 
from cols_trie import gen_trie
import numpy as np
import pandas as pd
import json
import os

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, f1_score
from sklearn.svm import SVC

In [2]:
df_train = pd.read_csv('datasets/train_radiomics_hipocamp.csv')
df_test = pd.read_csv('datasets/test_radiomics_hipocamp.csv')
df_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 305 entries, 0 to 304
Columns: 2181 entries, ID to Transition
dtypes: float64(2014), int64(147), object(20)
memory usage: 5.1+ MB


In [3]:
X_train, X_test, y_train, y_test, le = preprocess(df_train,mode="normal")


In [4]:

processed_df_train = pd.concat([X_train, pd.Series(y_train, name='Transition')], axis=1)

cols_trie = gen_trie(X_train.columns)

important = {"Age", "Sex","diagnostics","Transition"}
target = "Transition"
important_cols = [list(filter(lambda x:x.startswith(g), processed_df_train.columns)) for g in important]
important_cols = [item for sublist in important_cols for item in sublist]
groups = set(cols_trie.keys()) - (important | {target})

grouped = [list(filter(lambda x:x.startswith(g), processed_df_train.columns)) for g in groups | important-{target}]
model_col_pairs=[(SVC(probability=True,random_state=42,C=5,gamma='auto',kernel='rbf'),cols) for cols in grouped]


In [5]:
from imblearn.over_sampling import SMOTE
smote = SMOTE(random_state=987654321)
smote_X_train, smote_y_train = smote.fit_resample(X_train, y_train)

In [11]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from custom_bagging import CustomBAgging
from models import decision_tree
meta_learner = DecisionTreeClassifier(random_state=987654321)
m = CustomBAgging(processed_df_train.columns,model_col_pairs,meta_learner=meta_learner)
model = m.fit(smote_X_train,pd.Series(smote_y_train, name='Transition'))

In [12]:
y_pred = model.predict(X_test)
y_pred

classification_report(y_pred,y_test,output_dict=True)

{'0': {'precision': 0.4166666666666667,
  'recall': 0.2777777777777778,
  'f1-score': 0.3333333333333333,
  'support': 18.0},
 '1': {'precision': 0.2857142857142857,
  'recall': 0.4,
  'f1-score': 0.3333333333333333,
  'support': 15.0},
 '2': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1.0},
 '3': {'precision': 0.16666666666666666,
  'recall': 0.3,
  'f1-score': 0.21428571428571427,
  'support': 10.0},
 '4': {'precision': 0.1111111111111111,
  'recall': 0.058823529411764705,
  'f1-score': 0.07692307692307693,
  'support': 17.0},
 'accuracy': 0.2459016393442623,
 'macro avg': {'precision': 0.196031746031746,
  'recall': 0.20732026143790852,
  'f1-score': 0.19157509157509156,
  'support': 61.0},
 'weighted avg': {'precision': 0.25149622690606294,
  'recall': 0.2459016393442623,
  'f1-score': 0.23689425328769592,
  'support': 61.0}}