## Data Loading
**Load the dataset from a CSV file.**

In [None]:

import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Load the dataset
file_path = "cell_samples.csv"
data = pd.read_csv(file_path)
%matplotlib inline 

## Data Preprocessing
**Separate features and labels, then standardize the features.**

In [2]:

X = data.drop(columns=['output'])
y = data['output']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


## K-Means Clustering
**Apply K-Means clustering with 2 clusters.**

In [3]:

kmeans = KMeans(n_clusters=2, random_state=42, n_init=10)
clusters = kmeans.fit_predict(X_scaled)


## PCA Dimensionality Reduction
**Reduce dimensions to 7 principal components.**

In [4]:

pca = PCA(n_components=7)
X_pca = pca.fit_transform(X_scaled)


## Feature Combination
**Combine standardized features, PCA components, and clusters.**

In [5]:

X_final = np.hstack((X_scaled, X_pca, clusters.reshape(-1, 1)))


## RandomForest Model Training
**Train a RandomForest classifier with specific hyperparameters.**

In [6]:

model = RandomForestClassifier(
    n_estimators=200,
    max_depth=5,
    min_samples_leaf=2,
    min_samples_split=5,
    random_state=42
)
model.fit(X_final, y)


## Model Evaluation
**Evaluate the model on training data.**

In [7]:

y_pred_train = model.predict(X_final)
train_accuracy = accuracy_score(y, y_pred_train)
print(f"Training accuracy: {train_accuracy:.2f}")


Training accuracy: 0.94


## Train-Test Split & Retraining
**Split data into training and testing sets, then retrain the model.**

In [8]:

X_train, X_test, y_train, y_test = train_test_split(X_final, y, test_size=0.2, random_state=42)

model.fit(X_train, y_train)

train_accuracy = accuracy_score(y_train, model.predict(X_train))
print(f"Training accuracy: {train_accuracy:.2f}")

test_accuracy = accuracy_score(y_test, model.predict(X_test))
print(f"Test accuracy: {test_accuracy:.2f}")


Training accuracy: 0.94
Test accuracy: 0.87


## New Patient Prediction
**Predict the health status of a new patient.**

In [9]:

new_patient = np.array([[55, 1, 2, 130, 250, 0, 1, 170, 0, 2.5, 1, 0, 2]])

new_patient_df = pd.DataFrame(new_patient, columns=X.columns)
new_patient_scaled = scaler.transform(new_patient_df)
new_patient_pca = pca.transform(new_patient_scaled)
new_patient_cluster = kmeans.predict(new_patient_scaled).reshape(-1, 1)

new_patient_final = np.hstack((new_patient_scaled, new_patient_pca, new_patient_cluster))
prediction = model.predict(new_patient_final)[0]

status = "Unhealthy" if prediction == 1 else "Healthy"
print(f"The new patient is {status}.")


The new patient is Unhealthy.
