In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
import pickle

In [2]:
# Load the dataset
dataset = pd.read_csv("prep.csv", index_col=None)

In [3]:
# Create a copy of the dataset for transformation
dataset_transformed = pd.get_dummies(dataset, drop_first=True)

In [4]:
# Separate features and target variable
X = dataset_transformed.drop('classification_yes', axis=1)
y = dataset_transformed['classification_yes']

In [5]:
# Display the first few rows of the transformed dataset
dataset_transformed.head()

Unnamed: 0,age,bp,al,su,bgr,bu,sc,sod,pot,hrmo,...,pc_normal,pcc_present,ba_present,htn_yes,dm_yes,cad_yes,appet_yes,pe_yes,ane_yes,classification_yes
0,2.0,76.459948,3.0,0.0,148.112676,57.482105,3.077356,137.528754,4.627244,12.518156,...,0,0,0,0,0,0,1,1,0,1
1,3.0,76.459948,2.0,0.0,148.112676,22.0,0.7,137.528754,4.627244,10.7,...,1,0,0,0,0,0,1,0,0,1
2,4.0,76.459948,1.0,0.0,99.0,23.0,0.6,138.0,4.4,12.0,...,1,0,0,0,0,0,1,0,0,1
3,5.0,76.459948,1.0,0.0,148.112676,16.0,0.7,138.0,3.2,8.1,...,1,0,0,0,0,0,1,0,1,1
4,5.0,50.0,0.0,0.0,148.112676,25.0,0.6,137.528754,4.627244,11.8,...,1,0,0,0,0,0,1,0,0,1


In [6]:
# Display the shapes of feature and target datasets
print(X.shape, y.shape)

(399, 27) (399,)


In [7]:
# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [8]:
# Apply PCA
n_components = 4  # Number of principal components
pca = PCA(n_components=n_components)
X_pca = pca.fit_transform(X_scaled)

In [9]:
print(f"Shape of PCA-transformed features: {X_pca.shape}")

Shape of PCA-transformed features: (399, 4)


In [10]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.25, random_state=0)

In [11]:
# Initialize and train the Logistic Regression classifier
classifier = LogisticRegression(random_state=0)
classifier.fit(X_train, y_train)

In [12]:
# Predict the test set results
y_pred = classifier.predict(X_test)

In [13]:
# Evaluate the model
conf_matrix = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
classification_report_str = classification_report(y_test, y_pred)


In [14]:
# Display the results
print("Confusion Matrix:\n", conf_matrix)
print("Accuracy:", accuracy)
print("Classification Report:\n", classification_report_str)

Confusion Matrix:
 [[36  0]
 [ 1 63]]
Accuracy: 0.99
Classification Report:
               precision    recall  f1-score   support

           0       0.97      1.00      0.99        36
           1       1.00      0.98      0.99        64

    accuracy                           0.99       100
   macro avg       0.99      0.99      0.99       100
weighted avg       0.99      0.99      0.99       100



In [15]:

# Save the trained model and scaler to files
model_filename = "finalized_model_logistic_regression.sav"
scaler_filename = "scaler.pkl"
pca_filename = "pca.pkl"
pickle.dump(classifier, open(model_filename, 'wb'))
pickle.dump(scaler, open(scaler_filename, 'wb'))
pickle.dump(pca, open(pca_filename, 'wb'))

In [16]:
# Example input for prediction
example_input = [[76.45994832, 3, 0, 148.1126761, 3.077356021, 137.528754, 4.62724359, 12.51815562, 38.86890244, 8408.191126, 4.705597015, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]]

In [17]:
# Example input for prediction
#example_input = [[76.45994832, 3, 0, 148.1126761, 3.077356021, 137.528754, 4.62724359, 12.51815562, 38.86890244, 8408.191126, 4.705597015]]

In [18]:
# Load the saved scaler, PCA, and model and make a prediction
loaded_scaler = pickle.load(open(scaler_filename, 'rb'))
loaded_pca = pickle.load(open(pca_filename, 'rb'))
loaded_model = pickle.load(open(model_filename, 'rb'))


In [19]:
example_input_scaled = loaded_scaler.transform(example_input)
example_input_pca = loaded_pca.transform(example_input_scaled)
prediction_result = loaded_model.predict(example_input_pca)
prediction_result = prediction_result.astype(int)



In [20]:
print("Prediction result:", prediction_result)

Prediction result: [0]


In [21]:
example_input_pca

array([[937.85146898, 519.73703388,  58.53796397, 247.32587872]])