**Importing Libraries**

In [None]:
import pandas as pd
import numpy as np
from glob import glob
import os
import cv2
import matplotlib.pylab as plt
from skimage.io import imread
from skimage.transform import resize
from skimage.color import rgb2gray,rgba2rgb

In [None]:
input_dir = '/kaggle/input/hair-type-splitted/train_set'
categories = ['Curly Hair', 'Straight Hair', 'Wavy Hair']
data = []
labels = []
target_size = (15, 15)
for category_idx, category in enumerate(categories):
    category_path = os.path.join(input_dir, category)
    for file in os.listdir(category_path):
        img_path = os.path.join(category_path, file)
        try:
            img = imread(img_path)
            if len(img.shape) == 3 and img.shape[2] == 4:
                img = rgba2rgb(img)
            if len(img.shape) == 3:
                img = rgb2gray(img)

            img_resized = resize(img, target_size)
            data.append(img_resized.flatten())
            labels.append(category_idx)
        except Exception as e:
            print(f"Error processing {img_path}: {e}")
            continue

data = np.array(data)
labels = np.array(labels)

In [None]:
data

In [None]:
print(data.shape)
print(labels.shape)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

scaler = StandardScaler()
train_data = scaler.fit_transform(data)

random_forest = RandomForestClassifier(
    n_estimators=100,
    min_samples_split=2,
    min_samples_leaf=1,
    max_features='log2',
    max_depth=20
)
random_forest.fit(train_data, labels)

In [None]:
train_data

**Hyperparameter Tuning**

In [None]:
from sklearn.model_selection import RandomizedSearchCV
scaler = StandardScaler()
train_data1 = scaler.fit_transform(data)
param_dist = {
    'n_estimators': [50, 100],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2],
    'max_features': ['sqrt','log2',0.5,0.75]
}
random_forest1 = RandomForestClassifier(random_state=42)

random_search = RandomizedSearchCV(estimator=random_forest1, param_distributions=param_dist,
                                   n_iter=20, cv=20, random_state=42, n_jobs=-1, verbose=1, scoring='accuracy')

random_search.fit(train_data1, labels)
print(f"Best parameters: {random_search.best_params_}")
print(f"Best cross-validation accuracy: {random_search.best_score_}")

**Testing**

In [None]:
input_dir = '/kaggle/input/hair-type-splitted/test_set'
categories = ['Curly Hair', 'Straight Hair', 'Wavy Hair']
data1 = []
labels1 = []
target_size = (15, 15)
for category_idx, category in enumerate(categories):
    category_path = os.path.join(input_dir, category)
    for file in os.listdir(category_path):
        img_path = os.path.join(category_path, file)
        try:
            img = imread(img_path)
            if len(img.shape) == 3 and img.shape[2] == 4:
                img = rgba2rgb(img)
            if len(img.shape) == 3:
                img = rgb2gray(img)

            img_resized = resize(img, target_size)
            data1.append(img_resized.flatten())
            labels1.append(category_idx)
        except Exception as e:
            print(f"Error processing {img_path}: {e}")
            continue

data1 = np.array(data1)
labels1 = np.array(labels1)

In [None]:
test_data = scaler.transform(data1)
test_data

In [None]:
test_predictions = random_forest.predict(test_data)
test_accuracy = accuracy_score(labels1, test_predictions)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

***Cross Validation***

In [None]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score

In [None]:
def evaluate_model(model, train_data, train_labels, cv_folds=5):
    cv_scores = cross_val_score(model, train_data, train_labels, cv=cv_folds)
    print(f"{model.__class__.__name__} Cross-validation scores: {cv_scores}")
    print(f"Mean cross-validation accuracy: {np.mean(cv_scores) * 100:.2f}%")

In [None]:
models = [
    SVC(kernel='linear', C=1),
    RandomForestClassifier(n_estimators=100, random_state=42),
    KNeighborsClassifier(n_neighbors=5),
    LogisticRegression(max_iter=500)
]

for model in models:
    evaluate_model(model, train_data,labels)