In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def sigmoid(z):
    """sigmoid activation function"""
    return 1 / (1 + np.exp(-z))

def logistic_regression(X, y, learning_rate=0.1, num_iterations=1000, add_intercept=True):
    """logistic regression with gradient descent"""
    if add_intercept:
        X = np.hstack((np.ones((X.shape[0], 1)), X))  # add intercept column
    n_samples, n_features = X.shape
    weights = np.random.randn(n_features)  # initialize weights randomly
    for i in range(num_iterations):
        z = np.dot(X, weights)
        y_pred = sigmoid(z)
        error = y_pred - y
        gradient = np.dot(X.T, error) / n_samples
        weights -= learning_rate * gradient
    return weights

# read in data from .data file
data = pd.read_csv('clean2.data', header=None, delimiter=',', index_col=False)
data = data.drop(columns=data.columns[:2])

scaler = StandardScaler()

# fit scaler on data
scaler.fit(data.iloc[:, :-1])  # assumes last column is the target variable

# transform data
data.iloc[:, :-1] = scaler.transform(data.iloc[:, :-1])
# preprocess data to extract input features and binary labels
X = data.iloc[:, :-1].values  # input features
y = (data.iloc[:, -1].values == 1).astype(int)  # binary labels

# run logistic regression with gradient descent
learned_weights = logistic_regression(X, y)

print("Learned weights:", learned_weights)


Learned weights: [-4.53888294e+00 -1.86664859e-01 -8.64233376e-01 -6.16347159e-01
 -6.86105174e-01 -6.91941208e-01 -2.09427659e-01 -2.65104020e-01
 -4.72027489e-01  5.50168618e-01 -2.45234164e+00 -1.05335096e+00
 -4.87382655e-01  7.61241603e-01  1.17601179e+00  8.22200931e-01
  1.40717887e-02  1.94237883e+00  4.18307476e-01 -7.89320140e-01
 -7.33531924e-01 -7.02046706e-01  4.43774595e-01 -2.42361456e-01
  3.99754073e-01 -2.92435931e-01 -4.91685135e-01 -1.46036430e+00
  4.77527450e-01 -5.70829312e-01  8.84351179e-01 -1.37965323e+00
 -8.16030665e-01 -1.10872091e+00 -6.91653028e-01 -4.65076299e-02
 -3.34077778e-01 -5.74434978e-01 -3.05232013e-01 -3.01345698e-01
 -1.19457833e+00  6.61819987e-01  1.96864836e-01  9.82939585e-02
  1.27022077e+00 -5.36664893e-01 -2.30696234e-01 -7.93664057e-01
  8.18475080e-02  3.83230262e-01  1.43499051e+00  5.12762718e-01
  2.77561358e-01  4.67289356e-01  6.00048446e-01  7.99780628e-01
  8.25216863e-01  1.40861604e+00 -8.88024908e-01  9.67703941e-01
  3.4586

In [3]:
test = pd.read_csv('clean1.data', header=None, delimiter=',', index_col=False)

In [6]:
test = test.drop(columns=test.columns[:2])

In [9]:
scaler = StandardScaler()

# fit scaler on data
scaler.fit(test.iloc[:, :-1])  # assumes last column is the target variable

# transform data
test.iloc[:, :-1] = scaler.transform(test.iloc[:, :-1])
# preprocess data to extract input features and binary labels
X_test = test.iloc[:, :-1].values  # input features
y_test = (test.iloc[:, -1].values == 1).astype(int)  # binary

In [10]:
X_test = np.hstack((np.ones((X_test.shape[0], 1)), X_test))

In [11]:

# assume you have the sigmoid of the learned weights multiplied by the test data
sigmoid_values = sigmoid(np.dot(X_test,learned_weights))
# assume you have the true labels of the test data
true_labels = y_test

# convert sigmoid values to binary predictions using a threshold of 0.5
binary_predictions = (sigmoid_values >= 0.5).astype(int)

# calculate evaluation metrics
accuracy = accuracy_score(true_labels, binary_predictions)
precision = precision_score(true_labels, binary_predictions)
recall = recall_score(true_labels, binary_predictions)
f1 = f1_score(true_labels, binary_predictions)

# print evaluation metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 score:", f1)


Accuracy: 0.707983193277311
Precision: 0.84
Recall: 0.4057971014492754
F1 score: 0.5472312703583062


In [13]:
# assume you have the sigmoid of the learned weights multiplied by the test data
X = np.hstack((np.ones((X.shape[0], 1)), X))
sigmoid_values = sigmoid(np.dot(X,learned_weights))
# assume you have the true labels of the test data
true_labels = y

# convert sigmoid values to binary predictions using a threshold of 0.5
binary_predictions = (sigmoid_values >= 0.5).astype(int)

# calculate evaluation metrics
accuracy = accuracy_score(true_labels, binary_predictions)
precision = precision_score(true_labels, binary_predictions)
recall = recall_score(true_labels, binary_predictions)
f1 = f1_score(true_labels, binary_predictions)

# print evaluation metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 score:", f1)


Accuracy: 0.9201273113064565
Precision: 0.7930622009569378
Recall: 0.6519174041297935
F1 score: 0.7155963302752293
