In [46]:
df = pd.read_csv('/content/mushroom_csv.csv')

In [50]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from ucimlrepo import fetch_ucirepo

# Initialize LabelEncoder
label_encoders = {}
for column in df.columns:
    label_encoders[column] = LabelEncoder()
    df[column] = label_encoders[column].fit_transform(df[column])

# Separate features and target variable
X = df.drop('class', axis=1)  # features
y = df['class']  # target variable


# Sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Cost function (Mean Squared Error)
def mse(y, t):
    return 0.5 * np.mean((y - t) ** 2)

# Derivative of the sigmoid function for backpropagation
def sigmoid_derivative(z):
    return sigmoid(z) * (1 - sigmoid(z))

# Gradient descent update rule for logistic regression
def update_weights(X, y, t, w, b, learning_rate):
    # Calculate predictions
    predictions = sigmoid(np.dot(X, w) + b)

    # Calculate error
    error = y - t

    # Calculate gradients
    gradient_w = np.dot(X.T, error * sigmoid_derivative(predictions))
    gradient_b = np.sum(error * sigmoid_derivative(predictions))

    # Update weights and bias
    w -= learning_rate * gradient_w
    b -= learning_rate * gradient_b

    return w, b


In [51]:
# Main function to train logistic regression model
def train_logistic_regression(X, t, learning_rate=0.01, epochs=1000):
    # Initialize weights and bias
    w = np.zeros(X.shape[1])
    b = 0

    # Perform gradient descent
    for epoch in range(epochs):
        # Randomly shuffle data
        indices = np.arange(X.shape[0])
        np.random.shuffle(indices)
        X = X[indices]
        t = t[indices]

        # Update weights and bias for each instance
        for i in range(X.shape[0]):
            w, b = update_weights(X[i:i+1], sigmoid(np.dot(X[i:i+1], w) + b), t[i:i+1], w, b, learning_rate)

        # Print cost every 100 epochs
        if epoch % 100 == 0:
            cost = mse(sigmoid(np.dot(X, w) + b), t)
            print(f'Epoch {epoch}, Cost: {cost}')

    return w, b

In [55]:

# Split the dataset into training set and test set with a test size of 30%
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Display the transformed features and target variable
X_train.head(), y_train.head()

(      cap-shape  cap-surface  cap-color  bruises%3F  odor  gill-attachment  \
 5921          5            2          0           1     2                1   
 1073          5            0          3           1     5                1   
 3710          5            0          3           0     2                1   
 144           5            3          9           1     0                1   
 5469          5            3          4           0     8                1   
 
       gill-spacing  gill-size  gill-color  stalk-shape  ...  \
 5921             0          0           3            1  ...   
 1073             0          0           7            1  ...   
 3710             0          0           7            0  ...   
 144              0          0           4            0  ...   
 5469             0          1           0            1  ...   
 
       stalk-surface-below-ring  stalk-color-above-ring  \
 5921                         0                       7   
 1073               

In [57]:
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score

# Initialize the logistic regression model with stochastic gradient descent
log_reg_sgd = SGDClassifier(loss='log', max_iter=1000, tol=1e-3, random_state=42)

# Fit the model to the training data
log_reg_sgd.fit(X_train, y_train)

# Predict the class labels for the test set
y_pred = log_reg_sgd.predict(X_test)

# Calculate the accuracy of the model on the test set
accuracy = accuracy_score(y_test, y_pred)

# Output the accuracy
accuracy




0.9589827727645611