
###Heart Failure Dataset

In [None]:
# y=mx+c  linear regression   : data => trained model: m,c
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load the dataset
url = 'https://raw.githubusercontent.com/sauravmishra1710/Heart-Failure-Condition-And-Survival-Analysis/master/Data/heart_failure_clinical_records_dataset.csv'
heart_data = pd.read_csv(url)

# Define features (X) and target variable (y)
X = heart_data.drop(columns=['DEATH_EVENT'])  # Features
y = heart_data['DEATH_EVENT']  # Target variable

#with smote
from imblearn.over_sampling import SMOTE
# Apply SMOTE
smote = SMOTE(random_state=42)
X, y = smote.fit_resample(X, y)
X_resampled = X
X_resampled = y
# Display class distribution after SMOTE
print("\nClass Distribution After SMOTE:")
print(y.value_counts())
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


Class Distribution After SMOTE:
DEATH_EVENT
1    203
0    203
Name: count, dtype: int64


###KNN - Manual Search

In [None]:
# prompt: apply knn manual search

from sklearn.neighbors import KNeighborsClassifier

# Initialize lists to store scores
train_score = []
test_score = []

# Define the range of k values to be tested
k_values = range(1, 20)  #1-19
# Loop through different values of k
for k in k_values: #1,2,3,4,5,6....19
  print(f"k={k}")
  # Initialize and fit the KNN classifier
  knn = KNeighborsClassifier(n_neighbors=k) #k=1,2,3,4
  knn.fit(X_train, y_train)
  # print(f"Train Score:", knn.score(X_train, y_train))
  print(f"Test Score:", knn.score(X_test, y_test))
  # Calculate and store the scores
  test_score.append(knn.score(X_test, y_test))

print(test_score)
print(max(test_score))
# Find the k value with the highest test score
best_k = test_score.index(max(test_score)) + 1
# Print the best k and its corresponding scores
print(f"Best k: {best_k}")
print(f"Test score with k = {best_k}: {test_score[best_k - 1]}")


k=1
Test Score: 0.6341463414634146
k=2
Test Score: 0.6219512195121951
k=3
Test Score: 0.6219512195121951
k=4
Test Score: 0.573170731707317
k=5
Test Score: 0.5975609756097561
k=6
Test Score: 0.6097560975609756
k=7
Test Score: 0.5975609756097561
k=8
Test Score: 0.5487804878048781
k=9
Test Score: 0.5853658536585366
k=10
Test Score: 0.5487804878048781
k=11
Test Score: 0.5487804878048781
k=12
Test Score: 0.5609756097560976
k=13
Test Score: 0.573170731707317
k=14
Test Score: 0.5609756097560976
k=15
Test Score: 0.5853658536585366
k=16
Test Score: 0.5609756097560976
k=17
Test Score: 0.5853658536585366
k=18
Test Score: 0.573170731707317
k=19
Test Score: 0.5853658536585366
[0.6341463414634146, 0.6219512195121951, 0.6219512195121951, 0.573170731707317, 0.5975609756097561, 0.6097560975609756, 0.5975609756097561, 0.5487804878048781, 0.5853658536585366, 0.5487804878048781, 0.5487804878048781, 0.5609756097560976, 0.573170731707317, 0.5609756097560976, 0.5853658536585366, 0.5609756097560976, 0.5853658

###KNN-Grid Search

In [None]:
from sklearn.model_selection import GridSearchCV
# Define the grid of hyperparameters to search
param_grid = {
    # 'n_neighbors': [1,2,3,4,5,6,7,8]
    'n_neighbors': range(1, 20)
}

# Create a KNN classifier
knn = KNeighborsClassifier()
# Initialize the GridSearchCV object
grid_search = GridSearchCV(knn, param_grid, cv=5)
# Fit the grid search object to the training data
grid_search.fit(X_train, y_train)


In [None]:
best_score = grid_search.best_score_
# Get the best hyperparameters
best_params = grid_search.best_params_
print(best_score,best_params)



# Get the best KNN model
best_knn = grid_search.best_estimator_

0.6297596153846154 {'n_neighbors': 9}


In [None]:

# Evaluate the best KNN model on the test data
test_score = best_knn.score(X_test, y_test)

# Print the test score
print(f"Test score with best hyperparameters: {test_score}")

Test score with best hyperparameters: 0.5853658536585366


###SVM Grid Search

In [None]:
# prompt: apply grid search hyperparameter tuning svm c, kernel, gamma

from sklearn.model_selection import GridSearchCV
svm_classifier = SVC()
# Define the hyperparameter grid
grid = {
    'C': [1,10,100,1000],  # Regularization parameter
    'kernel': ['linear',  'rbf'],  # Kernel type
    'gamma': ['scale', 'auto', 0.1, 0.2,0.3,0.5,0.6]  # Kernel coefficient
}
# 1-linear-scale
#10-linear-scale
#100-linear-scale
##4*2*7 combinations trial (56 combinations)

# Create the grid search object
grid_search = GridSearchCV(svm_classifier, param_grid = grid, cv=5, scoring='accuracy',n_jobs = -1)

# Fit the grid search object to the training data
grid_search.fit(X_train, y_train)

# Print the best score
print("Best Hyperparameters:")
print(grid_search.best_score_)


# Print the best hyperparameters
print("Best Hyperparameters:")
print(best_params)
best_params = grid_search.best_params_

KeyboardInterrupt: 

In [None]:
# Get the best KNN model
best_svm = grid_search.best_estimator_

# Evaluate the best KNN model on the test data
test_score = best_svm.score(X_test, y_test)

# Print the test score
print(f"Test score with best hyperparameters: {test_score}")

###KNN Randomized CV

In [None]:
# prompt: apply knn with random search

from sklearn.model_selection import RandomizedSearchCV

# Define the grid of hyperparameters to search
param_grid = {
    'n_neighbors': range(1, 20)
    # 'weights': ['uniform', 'distance'],
    # 'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']
}

# Create a KNN classifier
knn = KNeighborsClassifier()

# Initialize the RandomizedSearchCV object
random_search = RandomizedSearchCV(knn, param_grid, cv=5, n_iter=10)

# Fit the random search object to the training data
random_search.fit(X_train, y_train)

# Get the best hyperparameters
best_params = random_search.best_params_

# Print the best hyperparameters
print("Best hyperparameters:")
print(best_params)

# Get the best KNN model
best_knn = random_search.best_estimator_

# Evaluate the best KNN model on the test data
test_score = best_knn.score(X_test, y_test)

# Print the test score
print(f"Test score with best hyperparameters: {test_score}")


Best hyperparameters:
{'n_neighbors': 5}
Test score with best hyperparameters: 0.5975609756097561


###KNN -Bayesian Hyperopt

In [None]:
!pip install hyperopt



In [None]:
import numpy as np
from sklearn.model_selection import cross_val_score
from hyperopt import hp, tpe, fmin, STATUS_OK, Trials


# Define the hyperparameter search space
space = {
    'n_neighbors': hp.choice('n_neighbors', range(1, 20))
    # 'weights': hp.choice('weights', ['uniform', 'distance']),
    # 'algorithm': hp.choice('algorithm', ['auto', 'ball_tree', 'kd_tree', 'brute'])
}

#z = 2x-3  , 0<x<10 function: optimum: max/min objective
#max Z = 2*10 - 3 =  17
#min Z = -(2x-3) = -2x+3 = -2*10+3  = -17  =17
#objective = max accuracy => min -accuracy => -accuracy

# Define the objective function
def objective(params):
  # Initialize the KNN classifier with the given hyperparameters
  knn = KNeighborsClassifier(**params)

  # Fit the KNN classifier on the training data
  knn.fit(X_train, y_train)

  # Perform cross-validation and calculate the negative accuracy score
  score = -np.mean(cross_val_score(knn, X_train, y_train, cv=5, scoring='accuracy'))

  # Return the loss dictionary
  return {'loss': score, 'status': STATUS_OK, 'model': knn}

# Run the optimization
trials = Trials()
best_params = fmin(fn=objective,
                  space=space,
                  algo=tpe.suggest,
                  max_evals=10,
                  trials=trials)

100%|██████████| 10/10 [00:01<00:00,  8.63trial/s, best loss: -0.6297596153846154]


In [None]:
trials.best_trial

{'state': 2,
 'tid': 6,
 'spec': None,
 'result': {'loss': -0.6297596153846154,
  'status': 'ok',
  'model': KNeighborsClassifier(n_neighbors=9)},
 'misc': {'tid': 6,
  'cmd': ('domain_attachment', 'FMinIter_Domain'),
  'workdir': None,
  'idxs': {'n_neighbors': [6]},
  'vals': {'n_neighbors': [8]}},
 'exp_key': None,
 'owner': None,
 'version': 0,
 'book_time': datetime.datetime(2024, 5, 18, 7, 22, 11, 776000),
 'refresh_time': datetime.datetime(2024, 5, 18, 7, 22, 11, 872000)}

In [None]:
# prompt: give me best model, best parameters, best score for hyperopt
# Get the best hyperparameters
best_params = trials.best_trial['misc']['vals']

# Get the best model
best_model = trials.best_trial['result']['model']

# Get the best score
best_score = -trials.best_trial['result']['loss']

# Print the results
print("Best parameters:", best_params)
print("Best model:", best_model)
print("Best score:", best_score)


Best parameters: {'n_neighbors': [8]}
Best model: KNeighborsClassifier(n_neighbors=9)
Best score: 0.6297596153846154


###Apply SVM - Hyperopt

In [None]:
#1:35
# prompt: apply bayesian optimization using hyperopt

from hyperopt import hp, tpe, fmin, STATUS_OK, Trials
from sklearn.model_selection import cross_val_score


def objective(space):
    # Define the hyperparameters
    C = space['C']
    kernel = space['kernel']
    gamma = space['gamma']

    # Create the SVM classifier
    svm_classifier = SVC(C=C, kernel=kernel, gamma=gamma)
    # # Train the classifier on the training data
    svm_classifier.fit(X_train, y_train)

    # # Evaluate the classifier on the validation data
    accuracy = accuracy_score(y_test, svm_classifier.predict(X_test))
    # We aim to maximize accuracy, therefore we return it as a negative value
    # Return the loss (negative accuracy)
    return {'loss': -accuracy, 'status': STATUS_OK, 'model': svm_classifier}


# Define the hyperparameter space
#hp.quniform('max_depth',10,1200,10) integer values
#hp.uniform('min_samples_split', 0,1) float values
space = {
    'C': hp.choice('C', [1, 10, 100, 1000]),
    'kernel': hp.choice('kernel', ['linear', 'rbf']),
    'gamma': hp.choice('gamma', ['scale', 'auto', 0.1, 0.2, 0.3, 0.5, 0.6])
}


# Perform Bayesian optimization
trials = Trials()
best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=10, trials=trials)

# Get the best hyperparameters and model
best_params = {key: space[key][best[key]] for key in space}
best_model = trials.best_trial['result']['model']

# Print the results
print("Best Hyperparameters:")
print(best_params)

print("Best Model Accuracy:")
print(accuracy_score(y_test, best_model.predict(X_test)))

100%|██████████| 10/10 [02:27<00:00, 14.74s/trial, best loss: -0.7195121951219512]
Best Hyperparameters:
{'C': <hyperopt.pyll.base.Apply object at 0x7959e3254430>, 'kernel': <hyperopt.pyll.base.Apply object at 0x7959e3256050>, 'gamma': <hyperopt.pyll.base.Apply object at 0x7959e3254910>}
Best Model Accuracy:
0.7195121951219512
