### Installing & Importing all the necessary packages

Update system packages, install `libgl1`, and install the `openpyxl` library for handling Excel files

In [None]:
from IPython.display import clear_output

!sudo apt-get update && apt-get install libgl1 -y
!pip install openpyxl

clear_output()

In [5]:
# Data manipulation and analysis
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.metrics import (accuracy_score, precision_score, recall_score, 
                             confusion_matrix, classification_report, roc_curve, 
                             roc_auc_score, f1_score, matthews_corrcoef, hamming_loss, 
                             precision_recall_curve)

# Import classifiers and utilities from scikit-learn for building and managing machine learning models:
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import ExtraTreeClassifier
from sklearn.linear_model import RidgeClassifier

# Image processing and augmentation
import os
from PIL import Image
import cv2
from tensorflow.keras.preprocessing.image import (load_img, img_to_array, ImageDataGenerator)
import albumentations as A
from skimage.segmentation import slic, mark_boundaries
import skimage.io

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import sweetviz

# Machine learning and deep learning
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (Input, Dense, BatchNormalization, Flatten, Conv2D, 
                                     MaxPooling2D, Dropout, GlobalAveragePooling2D, 
                                     LeakyReLU, Activation)
from tensorflow.keras.regularizers import l1, l2
from tensorflow.keras.optimizers import Adam, Adamax, SGD, Adagrad, Adadelta, RMSprop, Nadam
from tensorflow.keras.callbacks import (TensorBoard, ModelCheckpoint, EarlyStopping, 
                                        History, ReduceLROnPlateau, CSVLogger, LearningRateScheduler)
from tensorflow.keras.applications import (VGG19, ResNet152V2, ResNet50V2, ResNet101V2, 
                                           InceptionV3, InceptionResNetV2, MobileNetV2, 
                                           DenseNet169, NASNetMobile, EfficientNetB7, ConvNeXtBase)

# Explanation and visualization tools
from lime import lime_image
import shap
from IPython.display import Image, display

# Miscellaneous
import warnings
warnings.filterwarnings('ignore')

# IPython magic commands for TensorBoard
%matplotlib inline
%load_ext tensorboard
%reload_ext tensorboard

# Set plot parameters
params = {'figure.figsize': (16, 8),
          'legend.fontsize': 16,
          'legend.handlelength': 2,
          'axes.titlesize': 'large'}
sns.set_theme(style="white")
plt.rcParams.update(params)

### Google drive operations (optional)

In [9]:
#%cd "/content/drive/MyDrive/PCOS_TL_ML"
# !zip -r save_filename.extension . i folder_location_to_zip
#!zip -r folder.zip . i folder_to_zip
# !ls

In [10]:
#import os
#os.environ['KAGGLE_CONFIG_DIR'] = "/content/drive/MyDrive"
# my kaggle.json file in /content/drive/MyDrive/ColabNotebooks

In [11]:
#!pip install kaggle
#!kaggle datasets init -p /content/drive/MyDrive/PCOS_TL_ML
# my dataset in Resized folder.

In [12]:
#!kaggle datasets create -p /content/drive/MyDrive/PCOS_TL_ML --dir-mode zip

### Data Loading and Cleaning

Load class labels from an Excel file, binary test labels from a CSV, clean the class labels DataFrame by removing empty rows and columns, and display the cleaned data.

In [13]:
la1=pd.read_excel('C:/Users/anushka saini/OneDrive/Desktop/AutoPCOS_classification_challenge/dataset/train/class_label.xlsx')
la1.head()

Unnamed: 0,imagePath,Healthy
0,100image13.jpg,1
1,100image2.jpg,1
2,100image65.jpg,0
3,100image71.jpg,0
4,100image83.jpg,0


In [14]:
la2 = pd.read_csv('C:/Users/anushka saini/OneDrive/Desktop/AutoPCOS_classification_challenge/dataset/test_label_binary.csv')
la2.head()

Unnamed: 0,imagePath,Healthy
0,image10000.jpg,1
1,image10001.jpg,0
2,image10002.jpg,1
3,image10003.jpg,0
4,image10004.jpg,0


In [15]:
df = la1.dropna(how='all').dropna(how='all', axis=1)
print(df)

           imagePath  Healthy
0     100image13.jpg        1
1      100image2.jpg        1
2     100image65.jpg        0
3     100image71.jpg        0
4     100image83.jpg        0
...              ...      ...
3195       pco_4.jpg        1
3196       pco_5.jpg        0
3197       pco_6.jpg        0
3198       pco_7.jpg        0
3199       pco_8.jpg        0

[3200 rows x 2 columns]


### Train-Validate Split

Split the DataFrame into training and validation sets based on specified percentages, using optional random seed for reproducibility.

In [16]:
def train_validate_split(df, train_percent=.8, validate_percent=.2, seed=None):
    np.random.seed(seed)
    perm = np.random.permutation(df.index)
    m = len(df.index)
    train_end = int(train_percent * m)
    validate_end = int(validate_percent * m) + train_end
    train = df.iloc[perm[:train_end]]
    validate = df.iloc[perm[train_end:validate_end]]
    # test = df.iloc[perm[validate_end:]]
    return train, validate

In [17]:
train, validate = train_validate_split(df)

In [18]:
train

Unnamed: 0,imagePath,Healthy
881,image0436.jpg,0
1439,image1044.jpg,0
1028,image0592.jpg,0
208,191image0.jpg,0
10,102image93.jpg,0
...,...,...
2920,image3581.jpg,1
23,108image0081.jpg,0
1288,image0874.jpg,0
2161,image2678.jpg,0


In [20]:
validate

Unnamed: 0,imagePath,Healthy
327,3309image24.jpg,0
247,217image26.jpg,0
2281,image2835.jpg,1
2752,image3386.jpg,0
366,518image25.jpg,0
...,...,...
1299,image0885.jpg,0
895,image0450.jpg,0
1347,image0939.jpg,0
1651,image1607.jpg,0


### Image and Label Preparation

Map labels, load and preprocess images for training, validation, and testing, and display shapes of image arrays and labels.

In [21]:
label_mapping = {1: "Healthy", 0: "Unhealthy"}
df['Label'] = df['Healthy'].map(label_mapping)
print(df)

           imagePath  Healthy      Label
0     100image13.jpg        1    Healthy
1      100image2.jpg        1    Healthy
2     100image65.jpg        0  Unhealthy
3     100image71.jpg        0  Unhealthy
4     100image83.jpg        0  Unhealthy
...              ...      ...        ...
3195       pco_4.jpg        1    Healthy
3196       pco_5.jpg        0  Unhealthy
3197       pco_6.jpg        0  Unhealthy
3198       pco_7.jpg        0  Unhealthy
3199       pco_8.jpg        0  Unhealthy

[3200 rows x 3 columns]


In [22]:
train_labels = train[["Healthy"]].values
train_image_paths = [os.path.join('C:/Users/anushka saini/OneDrive/Desktop/AutoPCOS_classification_challenge/dataset/train/images', filename) for filename in train['imagePath']] #to change when using GPU. similarly for validate and test
train_images = []
for train_image_path in train_image_paths:
    image = load_img(train_image_path, target_size=(300, 300)) # change this when you run on GPU. 320X320 rahegi
    image = img_to_array(image) / 255.0  # Normalize pixel values between 0 and 1
    train_images.append(image)
train_images = np.array(train_images, dtype=np.float32)

In [23]:
train_images.shape

(2560, 300, 300, 3)

In [24]:
train_labels.shape

(2560, 1)

In [25]:
validate_labels = validate[["Healthy"]].values
validate_image_paths = [os.path.join('C:/Users/anushka saini/OneDrive/Desktop/AutoPCOS_classification_challenge/dataset/train/images', filename) for filename in validate['imagePath']]
validate_images = []
for validate_image_path in validate_image_paths:
    image = load_img(validate_image_path, target_size=(300, 300)) # change this when you run on GPU
    image = img_to_array(image) / 255.0  # Normalize pixel values between 0 and 1
    validate_images.append(image)
validate_images = np.array(validate_images, dtype=np.float32)

In [26]:
validate_images.shape

(640, 300, 300, 3)

In [27]:
validate_labels.shape

(640, 1)

In [28]:
test_labels = la2[["Healthy"]].values
test_image_paths = [os.path.join('C:/Users/anushka saini/OneDrive/Desktop/AutoPCOS_classification_challenge/dataset/test/images', filename) for filename in la2['imagePath']]
test_images = []
for test_image_path in test_image_paths:
    image = load_img(test_image_path, target_size=(300, 300)) # change this when you run on GPU
    image = img_to_array(image) / 255.0  # Normalize pixel values between 0 and 1
    test_images.append(image)
test_images = np.array(test_images, dtype=np.float32)

In [29]:
test_images.shape

(1468, 300, 300, 3)

In [30]:
test_labels.shape

(1468, 1)

### Data Reshaping for Classifier

Reshape training, validation, and test images into flat arrays for classifier input and print their shapes.

In [31]:
#train dataset 80% for classifier fitting
x_train = train_images.reshape(train_images.shape[0], -1)
y_train = train_labels
# print(x_train)
# print(y_train)
print(x_train.shape)
print(y_train.shape)

(2560, 270000)
(2560, 1)


In [32]:
#validation data 20% for classifier prediction
x_test_internal = validate_images.reshape(validate_images.shape[0], -1)
y_test_internal = validate_labels
# print(y_test_internal)
print(x_test_internal.shape)
print(y_test_internal.shape)

(640, 270000)
(640, 1)


In [33]:
x_test_external = test_images.reshape(test_images.shape[0], -1)
y_test_external = test_labels
# print(y_test_external)
print(x_test_external.shape)
print(y_test_external.shape)

(1468, 270000)
(1468, 1)


### Model Training and Evaluation

Train Multiple classifiers on the training data, then evaluate and print accuracy, balanced accuracy, weighted F1 score, weighted recall, weighted precision, and weighted Jaccard score for both internal and external test datasets.

In [35]:
clf0 = RandomForestClassifier(max_depth=2, random_state=0).fit(x_train, y_train)
ypred0 = clf0.predict(x_test_internal)
print("Accuracy is", metrics.accuracy_score(y_test_internal, ypred0) * 100)
print("Balanced accuracy is", metrics.balanced_accuracy_score(y_test_internal, ypred0) * 100)
print("Weighted f1 score is", metrics.f1_score(y_test_internal, ypred0, average='weighted') * 100)
print("Weighted recall is", metrics.recall_score(y_test_internal, ypred0, average='weighted') * 100)
print("Weighted precision is", metrics.precision_score(y_test_internal, ypred0, average='weighted') * 100)
print("Weighted Jaccard score is", metrics.jaccard_score(y_test_internal, ypred0, average='weighted') * 100)

Accuracy is 73.59375
Balanced accuracy is 50.0
Weighted f1 score is 62.39902115211521
Weighted recall is 73.59375
Weighted precision is 54.16040039062501
Weighted Jaccard score is 54.16040039062501


In [36]:
ypred1 = clf0.predict(x_test_external)
print("Accuracy is", metrics.accuracy_score(y_test_external, ypred1) * 100)
print("Balanced accuracy is", metrics.balanced_accuracy_score(y_test_external, ypred1) * 100)
print("Weighted f1 score is", metrics.f1_score(y_test_external, ypred1, average='weighted') * 100)
print("Weighted recall is", metrics.recall_score(y_test_external, ypred1, average='weighted') * 100)
print("Weighted precision is", metrics.precision_score(y_test_external, ypred1, average='weighted') * 100)
print("Weighted Jaccard score is", metrics.jaccard_score(y_test_external, ypred1, average='weighted') * 100)

Accuracy is 71.59400544959128
Balanced accuracy is 50.0
Weighted f1 score is 59.742199069091264
Weighted recall is 71.59400544959128
Weighted precision is 51.25701616316106
Weighted Jaccard score is 51.25701616316106


In [37]:
clf1 = RidgeClassifier().fit(x_train, y_train)
ypred2 = clf1.predict(x_test_internal)
print("Accuracy is", metrics.accuracy_score(y_test_internal, ypred2) * 100)
print("Balanced accuracy is", metrics.balanced_accuracy_score(y_test_internal, ypred2) * 100)
print("Weighted f1 score is", metrics.f1_score(y_test_internal, ypred2, average='weighted') * 100)
print("Weighted recall is", metrics.recall_score(y_test_internal, ypred2, average='weighted') * 100)
print("Weighted precision is", metrics.precision_score(y_test_internal, ypred2, average='weighted') * 100)
print("Weighted Jaccard score is", metrics.jaccard_score(y_test_internal, ypred2, average='weighted') * 100)


Accuracy is 71.25
Balanced accuracy is 62.44550810939836
Weighted f1 score is 71.08119185603172
Weighted recall is 71.25
Weighted precision is 70.9248273334105
Weighted Jaccard score is 57.24316569425042


In [38]:
ypred3 = clf1.predict(x_test_external)
print("Accuracy is", metrics.accuracy_score(y_test_external, ypred3) * 100)
print("Balanced accuracy is", metrics.balanced_accuracy_score(y_test_external, ypred3) * 100)
print("Weighted f1 score is", metrics.f1_score(y_test_external, ypred3, average='weighted') * 100)
print("Weighted recall is", metrics.recall_score(y_test_external, ypred3, average='weighted') * 100)
print("Weighted precision is", metrics.precision_score(y_test_external, ypred3, average='weighted') * 100)
print("Weighted Jaccard score is", metrics.jaccard_score(y_test_external, ypred3, average='weighted') * 100)


Accuracy is 80.38147138964578
Balanced accuracy is 73.13464166820683
Weighted f1 score is 79.74024514069991
Weighted recall is 80.38147138964578
Weighted precision is 79.60821924348645
Weighted Jaccard score is 67.63500584520403


In [39]:
extra_tree = ExtraTreeClassifier(random_state=0)
clf2 = BaggingClassifier(extra_tree, random_state=0).fit(x_train, y_train)
ypred4 = clf2.predict(x_test_internal)
print("Accuracy is", metrics.accuracy_score(y_test_internal, ypred4) * 100)
print("Balanced accuracy is", metrics.balanced_accuracy_score(y_test_internal, ypred4) * 100)
print("Weighted f1 score is", metrics.f1_score(y_test_internal, ypred4, average='weighted') * 100)
print("Weighted recall is", metrics.recall_score(y_test_internal, ypred4, average='weighted') * 100)
print("Weighted precision is", metrics.precision_score(y_test_internal, ypred4, average='weighted') * 100)
print("Weighted Jaccard score is", metrics.jaccard_score(y_test_internal, ypred4, average='weighted') * 100)

Accuracy is 76.09375
Balanced accuracy is 63.0805663387731
Weighted f1 score is 74.01932011632856
Weighted recall is 76.09375
Weighted precision is 73.8622318097015
Weighted Jaccard score is 61.61860657479359


In [40]:
ypred5 = clf2.predict(x_test_external)
print("Accuracy is", metrics.accuracy_score(y_test_external, ypred5) * 100)
print("Balanced accuracy is", metrics.balanced_accuracy_score(y_test_external, ypred5) * 100)
print("Weighted f1 score is", metrics.f1_score(y_test_external, ypred5, average='weighted') * 100)
print("Weighted recall is", metrics.recall_score(y_test_external, ypred5, average='weighted') * 100)
print("Weighted precision is", metrics.precision_score(y_test_external, ypred5, average='weighted') * 100)
print("Weighted Jaccard score is", metrics.jaccard_score(y_test_external, ypred5, average='weighted') * 100)

Accuracy is 84.26430517711172
Balanced accuracy is 74.8337200838758
Weighted f1 score is 82.94185502523663
Weighted recall is 84.26430517711174
Weighted precision is 84.5384594605152
Weighted Jaccard score is 72.22037514954508


In [41]:
clf3 = MLPClassifier(random_state=1, max_iter=300).fit(x_train, y_train)
ypred6 = clf3.predict(x_test_internal)
print("Accuracy is", metrics.accuracy_score(y_test_internal, ypred6) * 100)
print("Balanced accuracy is", metrics.balanced_accuracy_score(y_test_internal, ypred6) * 100)
print("Weighted f1 score is", metrics.f1_score(y_test_internal, ypred6, average='weighted') * 100)
print("Weighted recall is", metrics.recall_score(y_test_internal, ypred6, average='weighted') * 100)
print("Weighted precision is", metrics.precision_score(y_test_internal, ypred6, average='weighted') * 100)
print("Weighted Jaccard score is", metrics.jaccard_score(y_test_internal, ypred6, average='weighted') * 100)

Accuracy is 73.125
Balanced accuracy is 61.06358120076886
Weighted f1 score is 71.63376615273695
Weighted recall is 73.125
Weighted precision is 70.95895055904137
Weighted Jaccard score is 58.59859913793103


In [42]:
ypred7 = clf3.predict(x_test_external)
print("Accuracy is", metrics.accuracy_score(y_test_external, ypred7) * 100)
print("Balanced accuracy is", metrics.balanced_accuracy_score(y_test_external, ypred7) * 100)
print("Weighted f1 score is", metrics.f1_score(y_test_external, ypred7, average='weighted') * 100)
print("Weighted recall is", metrics.recall_score(y_test_external, ypred7, average='weighted') * 100)
print("Weighted precision is", metrics.precision_score(y_test_external, ypred7, average='weighted') * 100)
print("Weighted Jaccard score is", metrics.jaccard_score(y_test_external, ypred7, average='weighted') * 100)

Accuracy is 82.28882833787466
Balanced accuracy is 72.29679624521125
Weighted f1 score is 80.76210858858558
Weighted recall is 82.28882833787466
Weighted precision is 82.11859763992871
Weighted Jaccard score is 69.3788032431422


In [43]:
neigh = KNeighborsClassifier(n_neighbors=3)
clf4 = neigh.fit(x_train, y_train)
ypred8 = clf4.predict(x_test_internal)
print("Accuracy is", metrics.accuracy_score(y_test_internal, ypred8) * 100)
print("Balanced accuracy is", metrics.balanced_accuracy_score(y_test_internal, ypred8) * 100)
print("Weighted f1 score is", metrics.f1_score(y_test_internal, ypred8, average='weighted') * 100)
print("Weighted recall is", metrics.recall_score(y_test_internal, ypred8, average='weighted') * 100)
print("Weighted precision is", metrics.precision_score(y_test_internal, ypred8, average='weighted') * 100)
print("Weighted Jaccard score is", metrics.jaccard_score(y_test_internal, ypred8, average='weighted') * 100)


Accuracy is 73.28125
Balanced accuracy is 65.53285845299564
Weighted f1 score is 73.25577482527825
Weighted recall is 73.28125
Weighted precision is 73.23066422013721
Weighted Jaccard score is 59.62915060115353


In [44]:
ypred9 = clf4.predict(x_test_external)
print("Accuracy is", metrics.accuracy_score(y_test_external, ypred9) * 100)
print("Balanced accuracy is", metrics.balanced_accuracy_score(y_test_external, ypred9) * 100)
print("Weighted f1 score is", metrics.f1_score(y_test_external, ypred9, average='weighted') * 100)
print("Weighted recall is", metrics.recall_score(y_test_external, ypred9, average='weighted') * 100)
print("Weighted precision is", metrics.precision_score(y_test_external, ypred9, average='weighted') * 100)
print("Weighted Jaccard score is", metrics.jaccard_score(y_test_external, ypred9, average='weighted') * 100)

Accuracy is 86.1716621253406
Balanced accuracy is 79.70997131885358
Weighted f1 score is 85.61174734002364
Weighted recall is 86.1716621253406
Weighted precision is 85.90475193492459
Weighted Jaccard score is 75.6773052904305


In [45]:
clf5 = DecisionTreeClassifier(random_state=0)
clf5 = clf5.fit(x_train, y_train)
ypred10 = clf5.predict(x_test_internal)
print("Accuracy is", metrics.accuracy_score(y_test_internal, ypred10) * 100)
print("Balanced accuracy is", metrics.balanced_accuracy_score(y_test_internal, ypred10) * 100)
print("Weighted f1 score is", metrics.f1_score(y_test_internal, ypred10, average='weighted') * 100)
print("Weighted recall is", metrics.recall_score(y_test_internal, ypred10, average='weighted') * 100)
print("Weighted precision is", metrics.precision_score(y_test_internal, ypred10, average='weighted') * 100)
print("Weighted Jaccard score is", metrics.jaccard_score(y_test_internal, ypred10, average='weighted') * 100)

Accuracy is 67.03125
Balanced accuracy is 60.33806957373836
Weighted f1 score is 67.81022995872516
Weighted recall is 67.03125
Weighted precision is 68.8445577094474
Weighted Jaccard score is 53.09030268350799


In [46]:
ypred11 = clf5.predict(x_test_external)
print("Accuracy is", metrics.accuracy_score(y_test_external, ypred11) * 100)
print("Balanced accuracy is", metrics.balanced_accuracy_score(y_test_external, ypred11) * 100)
print("Weighted f1 score is", metrics.f1_score(y_test_external, ypred11, average='weighted') * 100)
print("Weighted recall is", metrics.recall_score(y_test_external, ypred11, average='weighted') * 100)
print("Weighted precision is", metrics.precision_score(y_test_external, ypred11, average='weighted') * 100)
print("Weighted Jaccard score is", metrics.jaccard_score(y_test_external, ypred11, average='weighted') * 100)

Accuracy is 79.49591280653951
Balanced accuracy is 73.09482575690161
Weighted f1 score is 79.10787480251466
Weighted recall is 79.49591280653951
Weighted precision is 78.90433617161182
Weighted Jaccard score is 66.69104266365947


In [47]:
clf6 = AdaBoostClassifier().fit(x_train,y_train)
ypred12 = clf6.predict(x_test_internal)
print("Accuracy is", metrics.accuracy_score(y_test_internal, ypred12) * 100)
print("Balanced accuracy is", metrics.balanced_accuracy_score(y_test_internal, ypred12) * 100)
print("Weighted f1 score is", metrics.f1_score(y_test_internal, ypred12, average='weighted') * 100)
print("Weighted recall is", metrics.recall_score(y_test_internal, ypred12, average='weighted') * 100)
print("Weighted precision is", metrics.precision_score(y_test_internal, ypred12, average='weighted') * 100)
print("Weighted Jaccard score is", metrics.jaccard_score(y_test_internal, ypred12, average='weighted') * 100)

Accuracy is 70.625
Balanced accuracy is 60.50327265417907
Weighted f1 score is 70.07421875
Weighted recall is 70.625
Weighted precision is 69.63421261122171
Weighted Jaccard score is 56.35134022881287


In [48]:
ypred13 = clf6.predict(x_test_external)
print("Accuracy is", metrics.accuracy_score(y_test_external, ypred13) * 100)
print("Balanced accuracy is", metrics.balanced_accuracy_score(y_test_external, ypred13) * 100)
print("Weighted f1 score is", metrics.f1_score(y_test_external, ypred13, average='weighted') * 100)
print("Weighted recall is", metrics.recall_score(y_test_external, ypred13, average='weighted') * 100)
print("Weighted precision is", metrics.precision_score(y_test_external, ypred13, average='weighted') * 100)
print("Weighted Jaccard score is", metrics.jaccard_score(y_test_external, ypred13, average='weighted') * 100)


Accuracy is 82.22070844686648
Balanced accuracy is 75.35942701595147
Weighted f1 score is 81.63061380947606
Weighted recall is 82.22070844686648
Weighted precision is 81.58726521798933
Weighted Jaccard score is 70.1178317905345


In [49]:
clf7 = GaussianNB().fit(x_train,y_train)
ypred14 = clf7.predict(x_test_internal)
print("Accuracy is", metrics.accuracy_score(y_test_internal, ypred14) * 100)
print("Balanced accuracy is", metrics.balanced_accuracy_score(y_test_internal, ypred14) * 100)
print("Weighted f1 score is", metrics.f1_score(y_test_internal, ypred14, average='weighted') * 100)
print("Weighted recall is", metrics.recall_score(y_test_internal, ypred14, average='weighted') * 100)
print("Weighted precision is", metrics.precision_score(y_test_internal, ypred14, average='weighted') * 100)
print("Weighted Jaccard score is", metrics.jaccard_score(y_test_internal, ypred14, average='weighted') * 100)

Accuracy is 62.5
Balanced accuracy is 68.26216409753891
Weighted f1 score is 64.6240234375
Weighted recall is 62.5
Weighted precision is 75.88678328474246
Weighted Jaccard score is 48.100303951367785


In [50]:
ypred15 = clf7.predict(x_test_external)
print("Accuracy is", metrics.accuracy_score(y_test_external, ypred15) * 100)
print("Balanced accuracy is", metrics.balanced_accuracy_score(y_test_external, ypred15) * 100)
print("Weighted f1 score is", metrics.f1_score(y_test_external, ypred15, average='weighted') * 100)
print("Weighted recall is", metrics.recall_score(y_test_external, ypred15, average='weighted') * 100)
print("Weighted precision is", metrics.precision_score(y_test_external, ypred15, average='weighted') * 100)
print("Weighted Jaccard score is", metrics.jaccard_score(y_test_external, ypred15, average='weighted') * 100)

Accuracy is 84.12806539509536
Balanced accuracy is 82.11626702443944
Weighted f1 score is 84.36073224355644
Weighted recall is 84.12806539509536
Weighted precision is 84.7639093778741
Weighted Jaccard score is 73.52713789369727
