In [1]:
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier

import cv2
import numpy as np
from skimage.feature import local_binary_pattern

  "class": algorithms.Blowfish,


In [2]:
def compute_lbp_histogram(image, radius, n_points):
    lbp = local_binary_pattern(image, n_points, radius, method='nri_uniform')
    (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(60), range=(0, 59))
    hist = hist.astype("float")
    hist /= hist.sum()  
    return hist

In [3]:
def process_images(image_folder, num_images, label):
    all_histograms = []
    if label == 1:
        for i in range(1, num_images):
            image_path = f"{image_folder}/{i}.jpg"
            image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
            if image is None:
                print(f"Error loading image: {image_path}")
                continue
            hist1 = compute_lbp_histogram(image, 1, 8)
            hist2 = compute_lbp_histogram(image, 2, 8)
            hist3 = compute_lbp_histogram(image, 3, 8)
            combined_hist = np.hstack([hist1, hist2, hist3])
            
            # Append the class label to each histogram
            labeled_hist = np.append(combined_hist, label)
            all_histograms.append(labeled_hist)
    else:
        for i in range(1, num_images):
            image_path = f"{image_folder}/-{i}.jpg"
            image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
            if image is None:
                print(f"Error loading image: {image_path}")
                continue
            hist1 = compute_lbp_histogram(image, 1, 8)
            hist2 = compute_lbp_histogram(image, 2, 8)
            hist3 = compute_lbp_histogram(image, 3, 8)
            combined_hist = np.hstack([hist1, hist2, hist3])
            
            # Append the class label to each histogram
            labeled_hist = np.append(combined_hist, label)
            all_histograms.append(labeled_hist)
    
    return np.vstack(all_histograms)

In [4]:
# Process COVID images with label 1
covid_folder = "C://covid"
num_covid_images = 69
covid_histograms = process_images(covid_folder, num_covid_images, label=1)


In [5]:
# Process non-COVID images with label 0
non_covid_folder = "C://noncovid"
num_non_covid_images = 69
non_covid_histograms = process_images(non_covid_folder, num_non_covid_images, label=0)

In [6]:
# Concatenate the datasets
dataset = np.vstack((covid_histograms, non_covid_histograms))

In [7]:
dataset

array([[0.129725, 0.052275, 0.00215 , ..., 0.085675, 0.474225, 1.      ],
       [0.12495 , 0.02625 , 0.0046  , ..., 0.10535 , 0.38175 , 1.      ],
       [0.115875, 0.0213  , 0.00515 , ..., 0.09705 , 0.353675, 1.      ],
       ...,
       [0.116625, 0.022675, 0.0053  , ..., 0.10725 , 0.36615 , 0.      ],
       [0.115675, 0.044875, 0.002425, ..., 0.087925, 0.436125, 0.      ],
       [0.1133  , 0.0325  , 0.0034  , ..., 0.096825, 0.407975, 0.      ]])

In [8]:
# Separate features (X) and labels (y)
X = dataset[:, :-1]  
y = dataset[:, -1] 

In [9]:
# Separate features (X) and labels (y)
X = dataset[:, :-1]  
y = dataset[:, -1] 

In [10]:
# Display the shapes to verify
print(f"Features shape (X): {X.shape}")
print(f"Labels shape (y): {y.shape}")


print("Sample of X:", X[:5])
print("Sample of y:", y[:5])

Features shape (X): (136, 177)
Labels shape (y): (136,)
Sample of X: [[0.129725 0.052275 0.00215  0.0117   0.002025 0.0506   0.002325 0.01255
  0.002275 0.006125 0.006575 0.00425  0.003775 0.00705  0.005325 0.00505
  0.0048   0.006325 0.0036   0.005225 0.0069   0.00485  0.0038   0.00555
  0.0069   0.007325 0.003875 0.003125 0.00545  0.00445  0.0032   0.004175
  0.00575  0.0086   0.005125 0.0055   0.0043   0.008725 0.004525 0.00575
  0.00475  0.004225 0.004625 0.0059   0.005425 0.0052   0.0051   0.005475
  0.005375 0.0029   0.008725 0.00305  0.0519   0.0033   0.009375 0.002925
  0.050775 0.13885  0.250525 0.1093   0.02555  0.0039   0.025825 0.003775
  0.02475  0.00455  0.0304   0.003775 0.0029   0.0032   0.004175 0.003325
  0.002675 0.00275  0.0039   0.003325 0.0046   0.0027   0.004325 0.00265
  0.004225 0.002575 0.004325 0.002775 0.00375  0.00615  0.005275 0.00165
  0.002375 0.006925 0.005725 0.002425 0.00405  0.0035   0.004325 0.003125
  0.003625 0.002725 0.0044   0.003175 0.003275 0.

In [11]:
# Split the data into training and testing sets with stratification
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, stratify=y)

In [12]:
# Initialize the Random Forest classifier with class weighting
rf_model = RandomForestClassifier(class_weight='balanced', random_state=42)

In [13]:
# Define the hyperparameters to tune
param_grid = {
    'n_estimators': [100, 150, 200, 500, 1000],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

In [14]:
# Use GridSearchCV to find the best hyperparameters
grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid, 
                           cv=5, n_jobs=-1, verbose=2)

In [15]:
# Fit GridSearchCV to the training data
grid_search.fit(X_train, y_train)
# Get the best model from grid search
best_rf_model = grid_search.best_estimator_

Fitting 5 folds for each of 180 candidates, totalling 900 fits


In [16]:
# Make predictions on the test set
y_pred = best_rf_model.predict(X_test)

In [17]:
# Evaluate the model
print("Best Parameters:", grid_search.best_params_)
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Accuracy Score:", accuracy_score(y_test, y_pred))

Best Parameters: {'max_depth': None, 'min_samples_leaf': 4, 'min_samples_split': 2, 'n_estimators': 150}
Classification Report:
              precision    recall  f1-score   support

         0.0       0.83      0.71      0.77         7
         1.0       0.75      0.86      0.80         7

    accuracy                           0.79        14
   macro avg       0.79      0.79      0.78        14
weighted avg       0.79      0.79      0.78        14

Accuracy Score: 0.7857142857142857


In [157]:
grid_search.score(X_test, y_test)

0.7857142857142857

In [18]:
from sklearn.ensemble import GradientBoostingClassifier
clf = GradientBoostingClassifier(random_state=42)

In [23]:
from sklearn.ensemble import ExtraTreesClassifier
clf = ExtraTreesClassifier(class_weight='balanced', random_state=42)

In [24]:
grid_search = GridSearchCV(estimator=clf, param_grid=param_grid, 
                           cv=5, n_jobs=-1, verbose=2)

In [25]:
# Fit GridSearchCV to the training data
grid_search.fit(X_train, y_train)
# Get the best model from grid search
best_rf_model = grid_search.best_estimator_

Fitting 5 folds for each of 180 candidates, totalling 900 fits


In [26]:
y_pred = best_rf_model.predict(X_test)

In [27]:
# Evaluate the model
print("Best Parameters:", grid_search.best_params_)
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Accuracy Score:", accuracy_score(y_test, y_pred))

Best Parameters: {'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 100}
Classification Report:
              precision    recall  f1-score   support

         0.0       1.00      0.57      0.73         7
         1.0       0.70      1.00      0.82         7

    accuracy                           0.79        14
   macro avg       0.85      0.79      0.78        14
weighted avg       0.85      0.79      0.78        14

Accuracy Score: 0.7857142857142857


In [114]:
y_pred = best_rf_model.predict_proba(X_test)

In [115]:
print(y_pred,y_test)

[[0.44700964 0.55299036]
 [0.55283857 0.44716143]
 [0.26919455 0.73080545]
 [0.44291895 0.55708105]
 [0.32510329 0.67489671]
 [0.41945911 0.58054089]
 [0.63157071 0.36842929]
 [0.44812099 0.55187901]
 [0.27929291 0.72070709]
 [0.68117532 0.31882468]
 [0.53326708 0.46673292]
 [0.38411655 0.61588345]
 [0.53992569 0.46007431]
 [0.52587448 0.47412552]] [1. 0. 1. 0. 1. 0. 0. 1. 1. 0. 0. 1. 0. 1.]


In [116]:
y_pred = best_rf_model.predict(X_test)

In [117]:
y_pred

array([1., 0., 1., 1., 1., 1., 0., 1., 1., 0., 0., 1., 0., 0.])

In [30]:
from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(random_state=1, max_iter=300000, hidden_layer_sizes=(512,512,512,512)).fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))
print("Accuracy Score:", accuracy_score(y_test, y_pred))

              precision    recall  f1-score   support

         0.0       1.00      0.57      0.73         7
         1.0       0.70      1.00      0.82         7

    accuracy                           0.79        14
   macro avg       0.85      0.79      0.78        14
weighted avg       0.85      0.79      0.78        14

Accuracy Score: 0.7857142857142857
