In [None]:
# import packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [None]:
# import data from processed_data.csv
df = pd.read_csv('500_processed_data.csv')

# drop rows with NaN
df = df.dropna()

# split into X and y
X = df.drop('condition', axis=1)
y = df['condition']

# convert from boolean to int
y = y.astype(int)
X = X.astype(float)

In [None]:
# split into train and test sets
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

In [None]:
# train logistic regression model on training data
logistic_regression = LogisticRegression()
logistic_regression.fit(x_train, y_train)

# Track accuracy and iterations
accuracy_list = []
iteration_list = []

# Predict on test data and calculate accuracy for each iteration
for i in range(1, 10):
    logistic_regression.fit(x_train, y_train)
    y_pred = logistic_regression.predict(x_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    # Append accuracy and iteration number to lists
    accuracy_list.append(accuracy)
    iteration_list.append(i)

    # Print accuracy for each iteration (optional)
    print(f"Iteration {i}: Accuracy = {accuracy}")

# Plot accuracy vs. iterations
plt.plot(iteration_list, accuracy_list)
plt.xlabel('Iteration')
plt.ylabel('Accuracy')
plt.title('Accuracy vs. Iterations')
plt.show()


In [None]:
# train logistic regression model on training data
logistic_regression = LogisticRegression()
logistic_regression.fit(x_train, y_train)

# predict on test data
y_pred = logistic_regression.predict(x_test)

# compute accuracy
accuracy = accuracy_score(y_test, y_pred)
print(accuracy)

In [None]:
# optimize logistic regression model
from sklearn.model_selection import GridSearchCV

# perform grid search
parameters = {'C': [0.001, 0.01, 0.1, 1, 10, 100]}
logistic_regression = LogisticRegression()
clf = GridSearchCV(logistic_regression, parameters, cv=5, verbose=0)
clf.fit(x_train, y_train)

# print best parameter after tuning
print(clf.best_params_)

# print how our model looks after hyper-parameter tuning
print(clf.best_estimator_)
logistic_regression = LogisticRegression(C=0.001, class_weight=None, dual=False, fit_intercept=True,
                                            intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1, penalty='l2',
                                            random_state=None, solver='liblinear', tol=0.0001, verbose=0, warm_start=False)

# predict on test data
y_pred = logistic_regression.predict(x_test)

# compute accuracy
accuracy = accuracy_score(y_test, y_pred)
print(accuracy)

In [None]:
# plot accuracy
plt.plot(accuracy)
plt.xlabel('C')
plt.ylabel('Accuracy')
plt.show()