In [1]:
# Importing libraries
import numpy as np
import pandas as pd
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import math
import os
from os import listdir
from os.path import isfile, join
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix 
from sklearn.metrics import accuracy_score 
from sklearn.metrics import classification_report 
import matplotlib.patches as patches
from PIL import Image
from mlxtend.plotting import plot_decision_regions
import seaborn as sns
import cv2
import random

%matplotlib inline

# Get current/root directory
root = os.getcwd()

from sklearn.ensemble import RandomForestClassifier
import pandas_profiling

from matplotlib import rcParams
import warnings

warnings.filterwarnings("ignore")

# figure size in inches
rcParams["figure.figsize"] = 10, 6
np.random.seed(42)

In [2]:
# Hyper-parameter settings
LEARNING_RATE = 0.001
MOMENTUM = 0.9
BATCH_SIZE = 3

class_arr = [0,1]

In [3]:
# Random data set select
patients_index = []
select_patients = 20

train_items = []
test_items = []

last_index = 0

for i in range(1, 101):
    if i != 57 and i != 112 and i != 179 and i != 184 and i != 120:
        patients_index.append(i)

sampled_list = random.sample(patients_index, select_patients)

for i in range(0, int(len(sampled_list)/2)):
    train_items.append(sampled_list[i])
    last_index = i

for i in range(i+1, len(sampled_list)):
    test_items.append(sampled_list[i])

print(sampled_list)

[67, 61, 43, 11, 39, 82, 58, 18, 49, 42, 91, 97, 23, 29, 65, 15, 28, 31, 77, 56]


In [4]:
# Veriables
feature_map = []
classes = [] # Catheter: 1, Echo: 0
data_count = 0
file_count = 0

mags = []
phases = []
sectors = []
distances = []
sigs = []

cat_data = []
echo_data = []

# Read train data
train_loc = 'D:/SCA/256_256/feature_map/exp/train'
train_files = [f for f in listdir(train_loc) if isfile(join(train_loc, f))]

print('Loading train data...')

for file in train_files:
    
    f_arr = file.split('_')
    pn = int(f_arr[0])
    otype = f_arr[3]
    
    if pn in train_items:

        lbl = 1 if otype == 'c' else 0

        dataset=pd.read_csv(train_loc + '/' + file, header=None, delimiter='\t')   
        dataset = dataset.values

        one_object_features = []

        d_count = 0

        for ds in dataset:
            # Order: magnitude(0), phase(1), signature(2), sector(3), distance(4)
            if d_count <= 39:

                o_arr = ds[0].split(',')
                o_arr = np.array(o_arr)
                o_arr = o_arr.astype(np.float)

                mags.append(o_arr[0])
                phases.append(o_arr[1])
                sectors.append(o_arr[3])
                distances.append(o_arr[4])
                sigs.append(o_arr[2])

                #one_object_features.append(o_arr[0])
                #one_object_features.append(o_arr[1])
                one_object_features.append(o_arr[2])
                #one_object_features.append(o_arr[3])
                #one_object_features.append(o_arr[4])

                d_count += 1

                if d_count == 39:
                    continue


        one_object_features = np.array(one_object_features)

        # Data normalization
        scaler = StandardScaler()
        scaled_data = scaler.fit_transform(one_object_features.reshape(-1, 1))

        feature_map.append(scaled_data)
        classes.append(lbl)

        if lbl == 1:
            cat_data.append(scaled_data)
            echo_data.append(scaled_data)
        else:
            echo_data.append(scaled_data)

        data_count = data_count + 1
        print('\r' + str(data_count), end='')

        file_count += 1
    
feature_map = np.array(feature_map)
classes = np.array(classes)

print('\nDone!')

Loading train data...
4042
Done!


In [5]:
# Load test data

# Veriables
test_feature_map = []
test_classes = [] # Catheter: 1, Echo: 0
test_files_title = []
data_count = 0

# Read test data
test_loc = 'D:/SCA/256_256/feature_map/exp/train'
#test_loc = 'D:/SCA/256_256/feature_map/test_unet'
test_files = [f for f in listdir(test_loc) if isfile(join(test_loc, f))]

print('Loading test data...')

for file in test_files:
    f_arr = file.split('_')
    pn = int(f_arr[0])
    otype = f_arr[3]
    
    if pn in test_items:
        
        if otype == 'e' or otype == 'c':
    
            lbl = 1 if otype == 'c' else 0

            dataset=pd.read_csv(test_loc + '/' + file, header=None, delimiter='\t')   
            dataset = dataset.values
            if len(dataset) == 0:
                print('null')

            one_object_features = []
            d_count = 0

            for ds in dataset:
                # Order: magnitude(0), phase(1), signature(2), sector(3), distance(4)

                if d_count <= 39:
                    o_arr = ds[0].split(',')
                    o_arr = np.array(o_arr)
                    o_arr = o_arr.astype(np.float)

                    #one_object_features.append(o_arr[0])
                    #one_object_features.append(o_arr[1])
                    one_object_features.append(o_arr[2])
                    #one_object_features.append(o_arr[3])
                    #one_object_features.append(o_arr[4])

                    d_count += 1

                    if d_count == 39:
                        continue

            data_count = data_count + 1
            print('\r' + str(data_count), end='')

            one_object_features = np.array(one_object_features)

            # Data normalization
            scaler = StandardScaler()
            scaled_data = scaler.fit_transform(one_object_features.reshape(-1, 1))

            test_feature_map.append(scaled_data)
            test_classes.append(lbl)
            test_files_title.append(file)
    
test_feature_map = np.array(test_feature_map)
test_classes = np.array(test_classes)
test_files_title = np.array(test_files_title)

print('\nDone!')

Loading test data...
4011
Done!


In [6]:
#Reshape train data set array into a 2d array.
nsamples, nx, ny = feature_map.shape
d2_train_dataset = feature_map.reshape((nsamples,nx*ny))

In [7]:
#Reshape test data set array into a 2d array.
nsamples_, nx_, ny_ = test_feature_map.shape
d2_test_dataset = test_feature_map.reshape((nsamples_,nx_*ny_))

In [8]:
# create the classifier
classifier = RandomForestClassifier(n_estimators=800)

# Train the model using the training sets
classifier.fit(d2_train_dataset, classes)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=800,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [9]:
# predictin on the test set
y_pred = classifier.predict(d2_test_dataset)

In [12]:
# Calculate Model Accuracy
acc = accuracy_score(test_classes, y_pred)
acc_arr.append(acc)
print("Accuracy:", acc)

Accuracy: 0.7354774370481176


In [13]:
acc_arr

[0.7385239929218278,
 0.762008495806557,
 0.7108669108669109,
 0.7519202823334026,
 0.7226019325948622]

In [11]:
acc_arr = [0.7385239929218278, 0.762008495806557, 0.7108669108669109, 0.7519202823334026]