<a href="https://colab.research.google.com/github/andrewpark19/4thyear-FYP/blob/main/randomforest_multiclass.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from tensorflow import keras
import matplotlib.pyplot as plt
import glob
import shutil
import os
import pandas as pd

In [None]:
!unzip /content/drive/MyDrive/melanoma_data/isic_2020/train_2020/train_2020.zip > /dev/null
!unzip /content/drive/MyDrive/melanoma_data/isic_2020/test_2020/test_2020.zip > /dev/null
!unzip /content/drive/MyDrive/melanoma_data/isic_2019/train_2019/train_2019.zip > /dev/null


In [None]:
train2020_dir = '/content/train_2020'
test2020_dir = '/content/test_2020'
train2019_dir = '/content/train_2019' 
data_dir = '/content/drive/MyDrive/melanoma_data'

def get_df(data_dir, train2020_dir, test2020_dir, train2019_dir, use_meta=False):

    ###Training Data
    #2020 Data
    df_train = pd.read_csv(os.path.join(data_dir, r'isic_2020','train_2020.csv')) #path to train_2020.csv
    df_train['filepath'] = df_train['image_name'].apply(lambda x: os.path.join(train2020_dir, f'{x}.jpg'))

    # #2019 data
    df_train2 = pd.read_csv(os.path.join(data_dir, r'isic_2019', 'train_2019.csv')) #path to train_2019.csv
    df_train2['filepath'] = df_train2['image_name'].apply(lambda x: os.path.join(train2019_dir, f'{x}.jpg'))

    # #Change Labels to match
    df_train['diagnosis']  = df_train['diagnosis'].apply(lambda x: x.replace('seborrheic keratosis', 'BKL'))
    df_train['diagnosis']  = df_train['diagnosis'].apply(lambda x: x.replace('lichenoid keratosis', 'BKL'))
    df_train['diagnosis']  = df_train['diagnosis'].apply(lambda x: x.replace('solar lentigo', 'BKL'))
    df_train['diagnosis']  = df_train['diagnosis'].apply(lambda x: x.replace('lentigo NOS', 'BKL'))
    df_train['diagnosis']  = df_train['diagnosis'].apply(lambda x: x.replace('cafe-au-lait macule', 'unknown'))
    df_train['diagnosis']  = df_train['diagnosis'].apply(lambda x: x.replace('atypical melanocytic proliferation', 'unknown'))

    df_train2['diagnosis'] = df_train2['diagnosis'].apply(lambda x: x.replace('NV', 'nevus'))
    df_train2['diagnosis'] = df_train2['diagnosis'].apply(lambda x: x.replace('MEL', 'melanoma'))

    df_train = pd.concat([df_train, df_train2]).reset_index(drop=True)

    # ###Test Data
    df_test = pd.read_csv(os.path.join(data_dir,r'isic_2020', 'test_2020.csv'))
    df_test['filepath'] = df_test['image_name'].apply(lambda x: os.path.join(test2020_dir, f'{x}.jpg'))

    if use_meta:
        df_train, df_test, meta_features, n_meta_features = get_meta_data(df_train, df_test)
    else:
        meta_features = None
        n_meta_features = 0

    diagnosis2idx = {d: idx for idx, d in enumerate(sorted(df_train.diagnosis.unique()))}
    df_train['target'] = df_train['diagnosis'].map(diagnosis2idx)
    mel_idx = diagnosis2idx['melanoma']

    return df_train, df_test, meta_features, n_meta_features, mel_idx
  

df_train, df_test, _, _, mel_idx = get_df(data_dir, train2020_dir, test2020_dir, train2019_dir, use_meta=False)

In [None]:
from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(horizontal_flip=True,
                                   vertical_flip=True,
                                   rescale=1/255.0
                                   )

In [None]:
SEED = 42

num_classes = 9
input_shape = (72, 72, 3)

learning_rate = 0.001
weight_decay = 0.0001
batch_size = 32
patience= 5
num_epochs = 40
image_size = 72  # We'll resize input images to this size

In [None]:
generator = train_datagen.flow_from_dataframe(     
    dataframe=df_train,  
    directory=None,
    x_col='filepath', # name of col in data frame that contains file names
    y_col="diagnosis", # name of col with labels 
    batch_size=42997,
    shuffle=True,
    #save_to_dir=saveDir,
    target_size=(image_size, image_size),
    color_mode='rgb',
    class_mode='sparse'
)


imgs,labels = next(generator)
print(f"imgs.shape:{imgs.shape},labels.shape:{labels.shape}")


  .format(n_invalid, x_col)


Found 42997 validated image filenames belonging to 9 classes.
imgs.shape:(42997, 72, 72, 3),labels.shape:(42997,)


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(imgs,labels, test_size=0.25,stratify=labels,random_state=SEED)
print(f"X_train.shape:{X_train.shape},X_val.shape:{X_val.shape}")
print(f"y_train.shape:{y_train.shape},y_val.shape:{y_val.shape}")

X_train.shape:(32247, 72, 72, 3),X_val.shape:(10750, 72, 72, 3)
y_train.shape:(32247,),y_val.shape:(10750,)


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import average_precision_score,confusion_matrix,classification_report, accuracy_score, f1_score, roc_auc_score, auc


nsamples, nx, ny, nrgb = X_train.shape
X_train2 = X_train.reshape((nsamples,nx*ny*nrgb))

nsamples, nx, ny, nrgb = X_val.shape
X_val2 = X_val.reshape((nsamples,nx*ny*nrgb))

model=RandomForestClassifier()
model.fit(X_train2, y_train)

y_pred=model.predict(X_val2)

array([7., 8., 7., ..., 7., 1., 6.], dtype=float32)

In [None]:
from sklearn.metrics import average_precision_score,confusion_matrix,classification_report, accuracy_score, f1_score, average_precision_score, roc_auc_score, auc

print(classification_report(y_pred,y_val))

              precision    recall  f1-score   support

         0.0       0.03      0.33      0.05        12
         1.0       0.44      0.47      0.46       563
         2.0       0.11      0.41      0.18       163
         3.0       0.00      0.00      0.00         0
         4.0       0.01      1.00      0.02         1
         5.0       0.00      0.00      0.00         0
         6.0       0.31      0.56      0.40       549
         7.0       0.85      0.69      0.76      4618
         8.0       0.92      0.85      0.88      4844

    accuracy                           0.74     10750
   macro avg       0.30      0.48      0.30     10750
weighted avg       0.82      0.74      0.77     10750



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
