In [1]:
# from google.colab import drive
# drive.mount('/content/drive')

In [1]:
from pathlib import Path
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
%matplotlib notebook
from sklearn import svm, metrics, datasets
from sklearn.utils import Bunch
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import precision_score, recall_score, confusion_matrix, roc_auc_score, classification_report
import skimage
from skimage.io import imread
from skimage.transform import resize

In [22]:
def load_image_files(path,image_names,labels, dimension=(100, 100)):
   
    image_dir = Path(path)
    #folders = [directory for directory in image_dir.iterdir() if directory.is_dir()]
    categories = ['Abnormal','Normal']

    
    images = []
    flat_images = []
    #labels = []
    #for i, direc in enumerate(folders):
    for file in image_names:
        img = skimage.io.imread(path+'/'+file)
        img_resized = resize(img, dimension, anti_aliasing=True, mode='reflect')
        flat_images.append(img_resized.flatten()) 
        images.append(img_resized)
        #labels.append()
    flat_images = np.array(flat_images)
    labels = np.array(labels)
    images = np.array(images)

    return Bunch(image_data=flat_images,
                 labels=labels,
                 label_names=categories,
                 images=images)
                 

In [18]:
path = 'PARTA_B'
#path = '/content/drive/MyDrive/gp/PARTA&B'


In [20]:
df = pd.read_csv('PA&BLABELD_balanced.csv')
df.head()

Unnamed: 0,Name,Abnormal,Infection,Ischemia
0,000765_10.jpg,0,0,0
1,001760_21_M.jpg,0,0,0
2,000813_30.jpg,1,1,0
3,001224_30.jpg,1,1,0
4,000676_30.jpg,1,1,0


In [21]:
image_names = df.iloc[:,0]
image_labels = df.iloc[:,1]

In [23]:
image_dataset = load_image_files(path,image_names,image_labels)
print('loading is done!')

loading is done!


In [24]:
X_train, X_test, y_train, y_test = train_test_split(image_dataset.image_data, image_dataset.labels, test_size=0.3,random_state=42)

In [25]:
param_grid = [
  {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
  {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
 ]
svc = svm.SVC()
svc_model = GridSearchCV(svc, param_grid)


In [26]:
svc_model.fit(X_train, y_train)

GridSearchCV(estimator=SVC(),
             param_grid=[{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
                         {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001],
                          'kernel': ['rbf']}])

In [27]:
y_pred = svc_model.predict(X_test)

In [29]:
confusion_matrix(y_true= y_test, y_pred=y_pred)

array([[790, 108],
       [191, 711]], dtype=int64)

In [28]:
print("Classification report for - \n{}:\n{}\n".format(
    svc_model, metrics.classification_report(y_test, y_pred)))

Classification report for - 
GridSearchCV(estimator=SVC(),
             param_grid=[{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
                         {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001],
                          'kernel': ['rbf']}]):
              precision    recall  f1-score   support

           0       0.81      0.88      0.84       898
           1       0.87      0.79      0.83       902

    accuracy                           0.83      1800
   macro avg       0.84      0.83      0.83      1800
weighted avg       0.84      0.83      0.83      1800




In [30]:
from sklearn.ensemble import RandomForestClassifier
rf_model = RandomForestClassifier(criterion='entropy', n_estimators=277, random_state=0)

In [31]:
rf_model.fit(X_train, y_train)

RandomForestClassifier(criterion='entropy', n_estimators=277, random_state=0)

In [32]:
y1_pred = rf_model.predict(X_test)

In [33]:
confusion_matrix(y_true= y_test, y_pred=y1_pred)

array([[788, 110],
       [176, 726]], dtype=int64)

In [34]:
print("Classification report for - \n{}:\n{}\n".format(
    rf_model, metrics.classification_report(y_test, y1_pred)))

Classification report for - 
RandomForestClassifier(criterion='entropy', n_estimators=277, random_state=0):
              precision    recall  f1-score   support

           0       0.82      0.88      0.85       898
           1       0.87      0.80      0.84       902

    accuracy                           0.84      1800
   macro avg       0.84      0.84      0.84      1800
weighted avg       0.84      0.84      0.84      1800




In [35]:
from sklearn.tree import DecisionTreeClassifier
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, y_train)

DecisionTreeClassifier()

In [36]:
y3_pred = dt_model.predict(X_test)

In [40]:
confusion_matrix(y_true= y_test, y_pred=y3_pred)

array([[662, 236],
       [233, 669]], dtype=int64)

In [38]:
print("Classification report for - \n{}:\n{}\n".format(
    dt_model, metrics.classification_report(y_test, y3_pred)))

Classification report for - 
DecisionTreeClassifier():
              precision    recall  f1-score   support

           0       0.74      0.74      0.74       898
           1       0.74      0.74      0.74       902

    accuracy                           0.74      1800
   macro avg       0.74      0.74      0.74      1800
weighted avg       0.74      0.74      0.74      1800


