In [None]:
from google.colab import drive
import os
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pwd
os.chdir("drive/MyDrive/Kolektif Öğrenme/project")
!pwd

/content
/content/drive/MyDrive/Kolektif Öğrenme/project


In [None]:
import cv2
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, RepeatedKFold
import math
from tensorflow.keras.applications.inception_v3 import InceptionV3
from keras.layers import GlobalAveragePooling2D, Dense, MaxPooling2D, Flatten, Dropout, Input
from keras.models import Model
from itertools import combinations as comb
from sklearn.ensemble import VotingClassifier, RandomForestClassifier
import xgboost as xgb

import warnings
warnings.filterwarnings("ignore")

In [None]:
IMAGE_SIZE = 128

In [None]:
covid_folder = "./covid/non-COVID"
non_covid_folder = "./covid/COVID"

In [None]:
def resize_image(image, image_size):
    return cv2.resize(image.copy(), image_size, interpolation=cv2.INTER_AREA)

In [None]:
def load_images_from_folder(folder):
    images = []
    for filename in os.listdir(folder):
        img = cv2.imread(os.path.join(folder,filename))
        if img is not None:
            images.append(resize_image(img, (IMAGE_SIZE, IMAGE_SIZE)) / 255.)
    return np.asarray(images)

In [None]:
def get_inception():
    inception_model = InceptionV3(
    include_top=False,
    weights="imagenet",
    input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3),
    )
    x = inception_model.output
    x = GlobalAveragePooling2D()(x)
    x = Flatten()(x)
    x = Dropout(0.5)(x)
    x = Dense(128, activation="relu")(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation="sigmoid")(x)
    model = Model(inputs=inception_model.input, outputs=output)
    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["acc"])

    return inception_model, model

In [None]:
def incept_rf(base_model, modely, train_imgs, test_imgs):
    X_train_features = []
    X_valid_features = []

    feature_network = Model(base_model.input, modely.get_layer(list(filter(lambda x: "flatten" in x.name,  model.layers))[0].name).output)
    X_train_features = feature_network.predict(train_imgs)
    X_valid_features = feature_network.predict(test_imgs)
    return X_train_features, X_valid_features

In [None]:
def generate_imp_space(X_train, y_train, X_test, imp_feature_size, foz):
    imp_train_data = X_train.values
    imp_test_data = X_test.values
    y_train = pd.DataFrame(y_train)
    d = len(X_train.columns)

    for i in range(0, imp_feature_size*foz):
        Xindis = np.random.permutation(d)
        for j in range(0, d-(foz-1), foz):
            sX = np.random.permutation(1)
            s1 = sX[0]

            s1data = X_train[X_train.index.isin(y_train[y_train == str(s1)].index)]
            s2data = X_train[~X_train.index.isin(y_train[y_train == str(s1)].index)]
            s1data = s1data.iloc[:, Xindis[j:j+(foz)]]
            s2data = s2data.iloc[:, Xindis[j:j+(foz)]]

            s1label = np.ones((s1data.values.shape[0], 1), dtype=int)
            s2label = -1*np.ones((s2data.values.shape[0], 1), dtype=int)
            Wdata = np.concatenate((s1data, s2data))

            Wdata = x2fx(Wdata)
            Wlabel = np.concatenate((s1label, s2label))
            W = np.matmul(np.matmul(np.linalg.pinv(
                np.matmul(Wdata.T, Wdata)), Wdata.T), Wlabel)

            WW = x2fx(X_train.iloc[:, Xindis[j:j+(foz)]].values)
            imp_train_data = np.concatenate(
                (imp_train_data, np.matmul(WW, W)), axis=1)

            TT = x2fx(X_test.iloc[:, Xindis[j:j+(foz)]].values)
            imp_test_data = np.concatenate(
                (imp_test_data, np.matmul(TT, W)), axis=1)

    return imp_train_data, imp_test_data


In [None]:
def x2fx(x, model="linear"):
    linear = np.c_[np.ones(x.shape[0]), x]
    if model == "linear":
        return linear
    if model == "purequadratic":
        return np.c_[linear, x**2]
    interaction = np.hstack([x[:, i]*x[:, j]
                            for i, j in comb(range(x.shape[1]), 2)]).T
    if model == "interaction":
        return np.c_[linear, interaction]
    if model == "quadratic":
        return np.c_[linear, interaction, x**2]

In [None]:
covid = load_images_from_folder(covid_folder)
non_covid = load_images_from_folder(non_covid_folder)

X = np.concatenate([covid, non_covid])
y = np.concatenate([np.ones(covid.shape[0]), np.zeros(non_covid.shape[0])])

del(covid)
del(non_covid)

In [None]:
cv = RepeatedKFold(n_splits=2, n_repeats=5, random_state=42)
foz = 4
imp_feature_size = 1
n_estimators = 3
imp_rfc_acc = []
rfc_acc = []

for train_index, test_index in cv.split(X, y):
    estimators_imp = []
    estimators = []
    X_train = X[train_index]
    X_test = X[test_index]
    y_train = y[train_index]
    y_test = y[test_index]

    inception_model, model = get_inception()
    history = model.fit(X_train, y_train, epochs=5, batch_size=64)

    X_train_features, X_valid_features = incept_rf(
        inception_model, model, X_train, X_test)
    X_train_features = pd.DataFrame(X_train_features)
    X_valid_features = pd.DataFrame(X_valid_features)
    d = len(X_train_features.columns)

    for i in range(n_estimators):
        imp_tr, imp_ts = generate_imp_space(
            X_train_features, y_train, X_valid_features, imp_feature_size, foz)
        imp_d = imp_tr.shape[1]

        imp_sel_d = 2 * round(math.log2(imp_d))
        sel_d = 2*round(math.log2(d))

        imp_rfc = RandomForestClassifier(
            max_features=imp_sel_d, n_estimators=50, random_state=42)
        estimators_imp.append(("imp_rfc"+str(i), imp_rfc))
        imp_rfc.fit(imp_tr, y_train)

        rfc = RandomForestClassifier(
            max_features=sel_d, n_estimators=50, random_state=42)
        estimators.append(("rfc"+str(i), rfc))
        rfc.fit(X_train_features, y_train)
    
    voting_imp = VotingClassifier(estimators=estimators_imp)
    voting_imp.fit(imp_tr, y_train)

    voting_rfc = VotingClassifier(estimators=estimators)
    voting_rfc.fit(X_train_features, y_train)

    imp_rfc_acc.append(voting_imp.score(imp_ts, y_test))
    rfc_acc.append(voting_rfc.score(X_valid_features, y_test))


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
cv = RepeatedKFold(n_splits=2, n_repeats=5, random_state=42)
foz = 4
imp_feature_size = 1
n_estimators = 3
imp_xgbc_acc = []
xgbc_acc = []
y_pred_imp_rfc = []
for train_index, test_index in cv.split(X, y):
    estimators_imp = []
    estimators = []
    X_train = X[train_index]
    X_test = X[test_index]
    y_train = y[train_index]
    y_test = y[test_index]

    inception_model, model = get_inception()
    history = model.fit(X_train, y_train, epochs=5, batch_size=64)

    X_train_features, X_valid_features = incept_rf(
        inception_model, model, X_train, X_test)
    X_train_features = pd.DataFrame(X_train_features)
    X_valid_features = pd.DataFrame(X_valid_features)
    d = len(X_train_features.columns)

    for i in range(n_estimators):
        imp_tr, imp_ts = generate_imp_space(
            X_train_features, y_train, X_valid_features, imp_feature_size, foz)
        imp_d = imp_tr.shape[1]

        imp_sel_d = 2 * round(math.log2(imp_d))
        sel_d = 2*round(math.log2(d))

        imp_xgbc = xgb.XGBClassifier(
            max_features=imp_sel_d, n_estimators=50, random_state=42)
        estimators_imp.append(("imp_xgbc"+str(i), imp_rfc))
        imp_xgbc.fit(imp_tr, y_train)

        xgbc = xgb.XGBClassifier(
            max_features=sel_d, n_estimators=50, random_state=42)
        estimators.append(("xgbc"+str(i), rfc))
        xgbc.fit(X_train_features, y_train)
    
    voting_imp = VotingClassifier(estimators=estimators_imp)
    voting_imp.fit(imp_tr, y_train)

    voting_xgbc = VotingClassifier(estimators=estimators)
    voting_xgbc.fit(X_train_features, y_train)

    imp_xgbc_acc.append(voting_imp.score(imp_ts, y_test))
    xgbc_acc.append(voting_xgbc.score(X_valid_features, y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
print(imp_xgbc_acc)

[0.8493150684931506, 0.9233870967741935, 0.9081385979049154, 0.8548387096774194, 0.830781627719581, 0.8145161290322581, 0.8614020950846092, 0.9024193548387097, 0.8686543110394843, 0.9266129032258065]


In [None]:
print(xgbc_acc)

[0.8493150684931506, 0.9201612903225806, 0.9008863819500403, 0.8580645161290322, 0.8372280419016922, 0.8096774193548387, 0.863013698630137, 0.9048387096774193, 0.8694601128122482, 0.9274193548387096]


In [None]:
print(imp_rfc_acc)

[0.9121676067687349, 0.8580645161290322, 0.8654311039484287, 0.825, 0.8420628525382756, 0.8379032258064516, 0.8847703464947623, 0.8766129032258064, 0.8597904915390814, 0.8637096774193549]


In [None]:
print(rfc_acc)

[0.9097502014504432, 0.8653225806451613, 0.8718775181305399, 0.825, 0.8533440773569702, 0.8524193548387097, 0.8887993553585818, 0.8758064516129033, 0.8614020950846092, 0.8685483870967742]
