In [5]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import VotingClassifier
from skimage.io import imread
from skimage.transform import resize
import os


load data

In [6]:
def load_data_from_folder(folder_path):
    images = []
    labels = []
    for label in ["HC", "PD"]:
        label_path = f"{folder_path}/{label}"
        for image_path in os.listdir(label_path):
            full_path = f"{label_path}/{image_path}"
            try:
                img = imread(full_path)
                # Convert all images to grayscale
                if len(img.shape) == 3:
                    img = np.mean(img, axis=-1)
                img = resize(img, (256, 256)).flatten()  # resize and flatten the image
                images.append(img)
                labels.append(label)
            except Exception as e:
                print(f"Error processing image: {full_path}")
                print(f"Error details: {e}")
                continue

    # Debugging step
    image_shapes = [img.shape for img in images]
    unique_shapes = set(image_shapes)
    print(f"Unique image shapes encountered: {unique_shapes}")

    return np.array(images), np.array(labels)

# Load data
drawings_X, drawings_y = load_data_from_folder("drawings")
speech_X, speech_y = load_data_from_folder("speech")


Unique image shapes encountered: {(65536,)}
Unique image shapes encountered: {(65536,)}


data processing

In [7]:
# Split data
X_train_drawings, X_test_drawings, y_train_drawings, y_test_drawings = train_test_split(drawings_X, drawings_y, test_size=0.2)
X_train_speech, X_test_speech, y_train_speech, y_test_speech = train_test_split(speech_X, speech_y, test_size=0.2)


train model

In [8]:
# Train Random Forest models
clf_drawings = RandomForestClassifier()
clf_drawings.fit(X_train_drawings, y_train_drawings)
drawings_predictions = clf_drawings.predict(X_test_drawings)
print("Performance for Drawings:")
print(classification_report(y_test_drawings, drawings_predictions))

clf_speech = RandomForestClassifier()
clf_speech.fit(X_train_speech, y_train_speech)
speech_predictions = clf_speech.predict(X_test_speech)
print("\nPerformance for Speech:")
print(classification_report(y_test_speech, speech_predictions))


Performance for Drawings:
              precision    recall  f1-score   support

          HC       0.73      0.94      0.83       263
          PD       0.84      0.47      0.60       169

    accuracy                           0.76       432
   macro avg       0.79      0.71      0.71       432
weighted avg       0.78      0.76      0.74       432


Performance for Speech:
              precision    recall  f1-score   support

          HC       0.92      0.71      0.80        17
          PD       0.82      0.96      0.88        24

    accuracy                           0.85        41
   macro avg       0.87      0.83      0.84        41
weighted avg       0.86      0.85      0.85        41



evaluate, ensemble

In [11]:
clf_drawings.save('drawings_model.h5')
clf_speech.save('speech_model.h5')



Performance for Ensemble:


ValueError: Found input variables with inconsistent numbers of samples: [432, 41]