In [1]:
import os
name = "Train_Random_Forest_Model"
load_model = True
save_model = False
train_again = False
working_dir = r"C:\Users\droko\Documents\קורסים\סדנה במדעי הנתונים\פרויקט"
saved_model_path = os.path.join(working_dir, f'{name}.h5')

# Loading the data

In [2]:
data_dir = os.path.join(working_dir, "ChestXRays")

In [3]:
test_path = os.path.join(data_dir,'test')
train_path = os.path.join(data_dir, 'train')
val_path = os.path.join(data_dir, 'val')

In [4]:
from skimage.io import imread
import cv2
from sklearn.model_selection import train_test_split

def load_data(data_path):
    images = []
    labels = []
    for label in ['NORMAL', 'PNEUMONIA']:
        path = os.path.join(data_path, label)
        for file in os.listdir(path):
            image = imread(os.path.join(path, file))
            image = cv2.cvtColor(image, cv2.IMREAD_GRAYSCALE)
            image = cv2.resize(image, (224, 224))
            image_np = image.flatten()
            images.append(image_np)
            labels.append(label)
        print(f"Loaded {len(images)} {label} images")
            
    return (images, labels)

In [5]:
if train_again:
    train_images, train_labels = load_data(train_path)
test_images, test_labels = load_data(test_path)
validation_images, validation_labels = load_data(val_path)

Loaded 234 NORMAL images
Loaded 624 PNEUMONIA images
Loaded 8 NORMAL images
Loaded 16 PNEUMONIA images


In [6]:
if False:
    from fastai.vision.all import *
    from tqdm import tqdm

    train_images = []
    train_labels = []

    def label_func(f):
        return "NORMAL" in str(f)
    files = get_image_files(os.path.join(train_path, "PNEUMONIA")) +  get_image_files(os.path.join(train_path, "NORMAL"))
    dls = ImageDataLoaders.from_path_func(train_path, files, label_func, item_tfms=Resize(224))
    dls.show_batch()

In [7]:
if False:
    learn = vision_learner(dls, resnet34, metrics=error_rate)
    learn.fine_tune(1)

# Building the model

## Create the model

In [8]:
from sklearn.ensemble import RandomForestClassifier
import pickle

if os.path.exists(saved_model_path) and load_model:
    with open(saved_model_path, "rb") as f:
        model = pickle.loads(f.read())
else:
    rfc = RandomForestClassifier(n_estimators=100)
    model = rfc.fit(train_images, train_labels)

In [9]:
if save_model:
    with open(saved_model_path, "wb") as f:
        pickle.dump(results, f)

# Evaluate the model on test set

In [10]:
test_images_score = model.score(test_images, test_labels)
print(f"The testing score: {test_images_score*100}%")

The testing score: 75.16025641025641%


In [11]:
validation_images_score = model.score(validation_images, validation_labels)
print(f"The validation score: {validation_images_score*100}%")

The validation score: 62.5%


# Prediction of an image

In [12]:
import random

def predict(model, image_path):
    image = imread(image_path)
    image = cv2.cvtColor(image, cv2.IMREAD_GRAYSCALE)
    image = cv2.resize(image, (224, 224))
    image_np = image.flatten()

    result = model.predict([image_np])
    print(f"{image_path}: {result[0]}")
    
normal_test_path = os.path.join(test_path, "NORMAL")
normal_image_path = os.path.join(normal_test_path, random.choice(os.listdir(normal_test_path)))
predict(model, normal_image_path)

pneumonia_test_path = os.path.join(test_path, "PNEUMONIA")
pneumonia_image_path = os.path.join(pneumonia_test_path, random.choice(os.listdir(pneumonia_test_path)))
predict(model, pneumonia_image_path)

C:\Users\droko\Documents\קורסים\סדנה במדעי הנתונים\פרויקט\ChestXRays\test\NORMAL\NORMAL2-IM-0287-0001.jpeg: NORMAL
C:\Users\droko\Documents\קורסים\סדנה במדעי הנתונים\פרויקט\ChestXRays\test\PNEUMONIA\person1653_virus_2859.jpeg: PNEUMONIA
