In [1]:
import numpy as np
from sklearn.ensemble import AdaBoostClassifier
import xgboost as xgb
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report,f1_score
from sklearn.preprocessing import LabelEncoder
import glob
import cv2


In [2]:
# Set the path to your dataset directory
dataset_path = '/kaggle/input/hydra-paramecium-dataset/micro-dataset'

In [3]:
# Function to load and preprocess images
def load_images(file_paths):
    images = []
    for path in file_paths:
        image = cv2.imread(path)
        if image is not None:
            image = cv2.resize(image, (128, 128))  # Resize the images to 128x128 pixels
            images.append(image)
        else:
            print(f"Unable to load image: {path}")
    return images

In [4]:
def extract_features(images):
    features = []
    for image in images:
        # Add your feature extraction code here if needed
        # For simplicity, let's flatten the image
        features.append(image.flatten())
    return features


In [5]:
# Load the images and labels
hydra_files = glob.glob('/kaggle/input/hydra-paramecium-dataset/micro-dataset/Hydra/*jpg')
paramecium_files = glob.glob('/kaggle/input/hydra-paramecium-dataset/micro-dataset/Paramecium/*jpg')
print('hydra:',len(hydra_files))
print(len(paramecium_files))
hydra_images = load_images(hydra_files)
paramecium_images = load_images(paramecium_files)

if len(hydra_images) == 0 or len(paramecium_images) == 0:
    print("No images found. Please check your dataset path and image file formats.")
    exit()

hydra: 122
144


In [6]:
# Extract features from the images
hydra_features = extract_features(hydra_images)
paramecium_features = extract_features(paramecium_images)

# Create the labels (0 for hydra, 1 for paramecium)
hydra_labels = np.zeros(len(hydra_features))
paramecium_labels = np.ones(len(paramecium_features))

# Combine the features and labels
features = np.concatenate((hydra_features, paramecium_features))
labels = np.concatenate((hydra_labels, paramecium_labels))

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

if len(X_train) == 0 or len(X_test) == 0:
    print("Insufficient data for training or testing. Please check your dataset size or split ratio.")
    exit()


In [7]:
#adaboost

# Initialize the AdaBoost classifier with decision tree as the base estimator
base_estimator = DecisionTreeClassifier(max_depth=1)
adaboost = AdaBoostClassifier(estimator=base_estimator,learning_rate=0.3, n_estimators=30, random_state=42)

# Train the classifier
adaboost.fit(X_train, y_train)

y_pred_train=adaboost.predict(X_train)
# Predict the labels for the test set
y_pred = adaboost.predict(X_test)



In [8]:
# #XGBoost
# # Convert the data to DMatrix format
# dtrain = xgb.DMatrix(X_train, label=y_train)
# dtest = xgb.DMatrix(X_test, label=y_test)

# # Set the XGBoost parameters
# params = {
#     'max_depth': 4,
#     'eta': 0.1,
#     'objective': 'binary:logistic',
#     'eval_metric': 'error'
# }

# # Train the XGBoost classifier
# num_rounds = 40
# xgboost_model = xgb.train(params, dtrain, num_rounds)

# # Predict the labels for the test set
# # y_pred_train = xgboost_model.predict(dtrain)
# # y_pred = np.round(y_pred)  # Convert probabilities to binary predictions
# # # Predict the labels for the test set
# # y_pred = xgboost_model.predict(dtest)
# # y_pred = np.round(y_pred)  # Convert probabilities to binary predictions
# y_pred_train = xgboost_model.predict(dtrain)
# y_pred_train = np.where(y_pred_train > 0.4, 1, 0)  # Convert probabilities to binary predictions

# y_pred = xgboost_model.predict(dtest)
# y_pred = np.where(y_pred > 0.4, 1, 0)  # Convert probabilities to binary predictions


In [9]:
# Calculate accuracy score
accuracy = accuracy_score(y_train, y_pred_train)
print("Train Accuracy: {:.2f}%".format(accuracy * 100))
# Calculate accuracy score
accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy: {:.2f}%".format(accuracy * 100))

# Calculate f1 score
f1score = f1_score(y_test, y_pred)
print("Test f1score: {:.2f}%".format(f1score * 100))

# Print classification report
target_names = ['hydra', 'paramecium']
print(classification_report(y_test, y_pred, target_names=target_names))


Train Accuracy: 97.64%
Test Accuracy: 81.48%
Test f1score: 80.77%
              precision    recall  f1-score   support

       hydra       0.79      0.85      0.82        27
  paramecium       0.84      0.78      0.81        27

    accuracy                           0.81        54
   macro avg       0.82      0.81      0.81        54
weighted avg       0.82      0.81      0.81        54

