In [1]:
import numpy as np

In [2]:
class LogisticRegression:
    def __init__(self, weight_range=(-1, 1), bias_range=(-1, 1), resolution=0.1):
        self.bias = 0.0
        self.weights = None
        self.weight_range = weight_range
        self.bias_range = bias_range
        self.resolution = resolution

    def _sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def _cross_entropy_loss(self, y_true, y_pred):
        y_pred = np.clip(y_pred, 1e-9, 1 - 1e-9)  # Avoid log(0)
        return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

    def _feature_extraction(self, X):
        # Convert string inputs to feature vectors (count 'a' occurrences)
        return np.array([[x.count('a')] for x in X], dtype=np.float32)

    def fit(self, X_train, y_train):
        X_train = self._feature_extraction(X_train)
        n_samples, n_features = X_train.shape

        best_loss = float('inf')
        best_weights = None
        best_bias = None

        weight_values = np.arange(self.weight_range[0], self.weight_range[1], self.resolution)
        bias_values = np.arange(self.bias_range[0], self.bias_range[1], self.resolution)

        for w in weight_values:
            for b in bias_values:
                linear_model = np.dot(X_train, w) + b
                y_pred = self._sigmoid(linear_model)
                loss = self._cross_entropy_loss(y_train, y_pred)

                if loss < best_loss:
                    best_loss = loss
                    best_weights = w
                    best_bias = b

        self.weights = np.array([best_weights])
        self.bias = best_bias

    def predict_proba(self, X):
        X = self._feature_extraction(X)
        linear_model = np.dot(X, self.weights) + self.bias
        return self._sigmoid(linear_model)

    def predict(self, X):
        return [1 if p >= 0.5 else 0 for p in self.predict_proba(X)]

    def evaluate(self, X_test, y_test):
        y_pred = self.predict(X_test)
        accuracy = np.mean(np.array(y_pred) == y_test)
        return accuracy


In [3]:
# Training data
X_train = [
    "aabaaabaaaaa", "bbaaaaab", "abbbbbbb", "babaabaaaaa",
    "abbbbbbbbbbb", "bbbaabbb", "bbbbbbbbbbbbabb", "abaaaaaaaa",
    "babbabbb", "abaababa"
]
y_train = np.array([0, 0, 1, 0, 1, 1, 1, 0, 1, 0])

X_test = ["bbbbbbbbbbbabb", "baaaaaaaa"]
y_test = np.array([1, 0])


In [4]:
# Initialize model
model = LogisticRegression(weight_range=(-2, 2), bias_range=(-2, 2), resolution=0.1)

# Train model
print("Training model...")
model.fit(X_train, y_train)
print("Training complete.")

# Test model
print("Testing model...")
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)
accuracy = model.evaluate(X_test, y_test)

Training model...
Training complete.
Testing model...


In [5]:
# Display results
print("Predicted probabilities:", y_proba)
print("Predicted labels:", y_pred)
print("Model accuracy:", accuracy)

Predicted probabilities: [0.5 0.5]
Predicted labels: [1, 1]
Model accuracy: 0.5
