# Fine-Tuning Your Model

## How good good is your model?

### Assessing a diabetes prediction classifier
metrics: accuracy, precision, recall, F1-score

In [None]:
# Import confusion matrix
from sklearn.metrics import confusion_matrix, classification_report

knn = KNeighborsClassifier(n_neighbors=6)

# Fit the model to the training data
knn.fit(X_train, y_train)

# Predict the labels of the test data: y_pred
y_pred = knn.predict(X_test)

# Generate the confusion matrix and classification report
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

## Logistic regression and the ROC curve

### Building a logistic regression model

In [None]:
# Import LogisticRegression
from sklearn.linear_model import LogisticRegression

# Instantiate the model
logreg = LogisticRegression()

# Fit the model
logreg.fit(X_train, y_train)

# Predict probabilities
y_pred_probs = logreg.predict_proba(X_test)[:,1]

print(y_pred_probs[:10])

### The ROC curve

In [None]:
# Import roc_curve
from sklearn.metrics import roc_curve

# Generate ROC curve values: fpr, tpr, thresholds
fpr, tpr, thresholds = roc_curve(y_test, y_pred_probs)

plt.plot([0, 1], [0, 1], 'k--')

# Plot tpr against fpr
plt.plot(fpr, tpr)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve for Diabetes Prediction')
plt.show()

### ROC AUC

In [None]:
# Import roc_auc_score
from sklearn.metrics import roc_auc_score

# Calculate roc_auc_score
print(roc_auc_score(y_test, y_pred_probs))

# Calculate the confusion matrix
print(confusion_matrix(y_test, y_pred))

# Calculate the classification report
print(classification_report(y_test, y_pred))

## Hyperparameter tuning

### Hyperparameter tuning with GridSearchCV

In [None]:
# X_train, X_test, y_train, and y_test have been preloaded. A KFold() object has been created and stored as kf, along with a lasso regression model as lasso.

# Import GridSearchCV
from sklearn.model_selection import GridSearchCV

# Set up the parameter grid for "alpha", using np.linspace() to create 20 evenly spaced values ranging from 0.00001 to 1
param_grid = {"alpha": np.linspace(0.00001, 1, 20)}

# another set up for ridge regression
# param_grid = {"alpha": np.arange(0.0001, 1, 10),
#               "solver": ["sag", "lsqr"]}

# Instantiate lasso_cv
lasso_cv = GridSearchCV(lasso, param_grid, cv=kf)

# Fit to the training data
lasso_cv.fit(X_train, y_train)
print("Tuned lasso parameters: {}".format(lasso_cv.best_params_))
print("Tuned lasso score: {}".format(lasso_cv.best_score_))

### Hyperparameter tuning with RandomizedSearchCV
Training and test sets from diabetes_df have been pre-loaded for you as X_train. X_test, y_train, and y_test, where the target is "diabetes". A logistic regression model has been created and stored as logreg, as well as a KFold variable stored as kf.