<a href="https://colab.research.google.com/github/aidanbolinger/MachineLearning/blob/main/ProgrammingQuiz.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
#Question 3: Perform 5-fold cross-validation on a `LogisticRegression`
#model using the iris dataset and report the average accuracy.
print("Question 3:\n")
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score

#Load Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

#Create a logistic regression model
model = LogisticRegression(max_iter=200)

#Perform 5-fold cross-validation
scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')

#Report the average accuracy
print("Cross-validation scores:", scores)
print("Average accuracy:", scores.mean())

Cross-validation scores: [0.96666667 1.         0.93333333 0.96666667 1.        ]
Average accuracy: 0.9733333333333334


In [3]:
#Question 4: Train a `GradientBoostingClassifier` on the breast cancer dataset and
#compare its accuracy with `RandomForestClassifier`.
print("\nQuestion 4:\n")
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

#Load breast cancer dataset
data = load_breast_cancer()
X = data.data
y = data.target

#Split dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Create classifiers
gb_clf = GradientBoostingClassifier(n_estimators=100, random_state=42)
rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)

#Train classifiers
gb_clf.fit(X_train, y_train)
rf_clf.fit(X_train, y_train)

#Make predictions
gb_pred = gb_clf.predict(X_test)
rf_pred = rf_clf.predict(X_test)

#Calculate accuracy
gb_acc = accuracy_score(y_test, gb_pred)
rf_acc = accuracy_score(y_test, rf_pred)

#Print results
print("Gradient Boosting Classifier accuracy:", gb_acc)
print("Random Forest Classifier accuracy:", rf_acc)


Question 4:

Gradient Boosting Classifier accuracy: 0.956140350877193
Random Forest Classifier accuracy: 0.9649122807017544


In [4]:
#Question 5: Use `VotingClassifier` to combine `LogisticRegression`,
#`SVC`, and `KNeighborsClassifier`.
print("\nQuestion 5:\n")
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

#Load breast cancer dataset
data = load_breast_cancer()
X = data.data
y = data.target

#Split dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Create pipeines with scaling
pipe_lr = make_pipeline(StandardScaler(), LogisticRegression(max_iter=10000, random_state=42))
pipe_svc = make_pipeline(StandardScaler(), SVC(probability=True, random_state=42))
pipe_knn = make_pipeline(StandardScaler(), KNeighborsClassifier())

#Create voting classifier
voting_clf = VotingClassifier(
    estimators=[
      ('lr', pipe_lr),
      ('svc', pipe_svc),
      ('knn', pipe_knn)
    ],
    voting='soft')

#Train classifier
voting_clf.fit(X_train, y_train)

#Evaluate
y_pred = voting_clf.predict(X_test)
acc = accuracy_score(y_test, y_pred)

#Display results
print("Voting Classifier accuracy:", acc)


Question 5:

Voting Classifier accuracy: 0.9649122807017544


In [12]:
#Question 7: Perform hyperparameter
#tuning on a `GradientBoostingClassifier` using `GridSearchCV`
print("\nQuestion 7:\n")
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score

#Load breast cancer dataset
data = load_breast_cancer()
X = data.data
y = data.target

#Split dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Create the search space
param_grid = {
    'n_estimators': [50, 100],
    'learning_rate': [0.1, 0.2],
    'max_depth': [3, 4]
}

#Create classifier
clf = GradientBoostingClassifier(random_state=42)

#Create GridSearchCV
grid_search = GridSearchCV(
    estimator = clf,
    param_grid = param_grid,
    cv=5,
    scoring='accuracy',
    n_jobs=-1,
    )

#Fit classifier
grid_search.fit(X_train, y_train)

#Evaluate
best_clf = grid_search.best_estimator_
y_pred = best_clf.predict(X_test)
acc = accuracy_score(y_test, y_pred)

#Display results
print("Best parameters:", grid_search.best_params_)
print("Best accuracy:", acc)



Question 7:

Best parameters: {'learning_rate': 0.2, 'max_depth': 3, 'n_estimators': 100}
Best accuracy: 0.956140350877193
