In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
import joblib

In [3]:
pip install --upgrade mlxtend

Collecting mlxtend
  Downloading mlxtend-0.23.1-py3-none-any.whl (1.4 MB)
     ---------------------------------------- 1.4/1.4 MB 3.0 MB/s eta 0:00:00
Installing collected packages: mlxtend
  Attempting uninstall: mlxtend
    Found existing installation: mlxtend 0.14.0
    Uninstalling mlxtend-0.14.0:
      Successfully uninstalled mlxtend-0.14.0
Successfully installed mlxtend-0.23.1
Note: you may need to restart the kernel to use updated packages.


In [2]:
# Load the dataset
dataset = pd.read_csv("prep.csv", index_col=None)

In [3]:
# Create a copy of the dataset for transformation

dataset_transformed  = pd.get_dummies(dataset, drop_first =  True)



In [4]:
# Separate features and target variable

X = dataset_transformed.drop('classification_yes', axis=1)
y= dataset_transformed['classification_yes']


In [5]:
# Display the first few rows of the transformed dataset
dataset_transformed.head()

Unnamed: 0,age,bp,al,su,bgr,bu,sc,sod,pot,hrmo,...,pc_normal,pcc_present,ba_present,htn_yes,dm_yes,cad_yes,appet_yes,pe_yes,ane_yes,classification_yes
0,2.0,76.459948,3.0,0.0,148.112676,57.482105,3.077356,137.528754,4.627244,12.518156,...,0,0,0,0,0,0,1,1,0,1
1,3.0,76.459948,2.0,0.0,148.112676,22.0,0.7,137.528754,4.627244,10.7,...,1,0,0,0,0,0,1,0,0,1
2,4.0,76.459948,1.0,0.0,99.0,23.0,0.6,138.0,4.4,12.0,...,1,0,0,0,0,0,1,0,0,1
3,5.0,76.459948,1.0,0.0,148.112676,16.0,0.7,138.0,3.2,8.1,...,1,0,0,0,0,0,1,0,1,1
4,5.0,50.0,0.0,0.0,148.112676,25.0,0.6,137.528754,4.627244,11.8,...,1,0,0,0,0,0,1,0,0,1


In [6]:
# Display the shapes of feature and target datasets
print(X.shape, y.shape)

(399, 27) (399,)


In [7]:
# Initialize the Random Forest model
random_forest_model = RandomForestClassifier(random_state=0)

In [8]:
#apply sequentail selectcot with backward elimination 

sfs = SFS(random_forest_model, 
          k_features=10, 
          forward=False, 
          floating=False, 
          scoring='accuracy', 
          cv=5)
sfs = sfs.fit(X, y)



In [9]:
#transform the feature dataset

# Get the selected feature indices and names
selected_features_indices = sfs.k_feature_idx_

X_selected = sfs.transform(X)
print(X_selected.shape)

(399, 10)


In [14]:
#get the name of selected features 
selected_features_indices = sfs.k_feature_idx_
selected_features = X.columns[list(selected_features_indices)]
print(selected_features_indices)
print(selected_features)



(0, 1, 2, 3, 4, 6, 10, 13, 14, 15)
Index(['age', 'bp', 'al', 'su', 'bgr', 'sc', 'pcv', 'sg_b', 'sg_c', 'sg_d'], dtype='object')


In [15]:
# Create the final feature and target datasets
X_final = X[selected_features]
y_final = y

In [16]:
print(X_final.shape, X.shape)

(399, 10) (399, 27)


In [17]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_final, y_final, test_size=0.25, random_state=0)

In [18]:
# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [19]:
# Initialize and train the Random Forest classifier
classifier = RandomForestClassifier(random_state=0)
classifier.fit(X_train, y_train)


In [20]:
# Predict the test set results
y_pred = classifier.predict(X_test)

In [21]:
# Evaluate the model
conf_matrix = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
classification_report = classification_report(y_test, y_pred)

In [22]:
# Display the results
print("Confusion Matrix:\n", conf_matrix)
print("Accuracy:", accuracy)
print("Classification Report:\n", classification_report)

Confusion Matrix:
 [[35  1]
 [ 0 64]]
Accuracy: 0.99
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.97      0.99        36
           1       0.98      1.00      0.99        64

    accuracy                           0.99       100
   macro avg       0.99      0.99      0.99       100
weighted avg       0.99      0.99      0.99       100



In [26]:
import pickle

# Save the trained model to a file
model_filename = "finalized_model_random_forest.sav."
pickle.dump(classifier, open(model_filename, 'wb'))

# Example input for prediction 
example_input = scaler.transform([[5, 50, 0,0, 148.1126761, 0.6, 36, 1,0,1]])

# Load the saved model and make a prediction
loaded_model = pickle.load(open(model_filename, 'rb'))
prediction_result = loaded_model.predict(example_input)
prediction_result = prediction_result.astype(int)

print("Prediction result:", prediction_result)

Prediction result: [1]


