In [2]:

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Define the data
data_buying = {
    'Customer ID': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'Age': [25, 45, 35, 50, 30, 40, 28, 55, 32, 42],
    'Income ($1000s)': [40, 80, 60, 100, 45, 70, 50, 90, 55, 75],
    'Education Level': ['Bachelor\'s', 'Master\'s', 'PhD', 'Bachelor\'s', 'High School', 'Master\'s', 'Bachelor\'s', 'PhD', 'High School', 'Master\'s'],
    'Previous Purchases': [2, 10, 5, 8, 1, 6, 3, 12, 4, 7],
    'Marital Status': ['Single', 'Married', 'Single', 'Married', 'Divorced', 'Married', 'Single', 'Widowed', 'Married', 'Single'],
    'Buy Product (Target)': ['No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes']
}

# Convert to DataFrame
df_buying = pd.DataFrame(data_buying)

# Step 1: Data Preparation
# Convert categorical features to numerical
df_buying['Education Level'] = df_buying['Education Level'].map({'High School': 0, 'Bachelor\'s': 1, 'Master\'s': 2, 'PhD': 3})
df_buying['Marital Status'] = df_buying['Marital Status'].map({'Single': 0, 'Married': 1, 'Divorced': 2, 'Widowed': 3})
df_buying['Buy Product (Target)'] = df_buying['Buy Product (Target)'].map({'No': 0, 'Yes': 1})

# Features and target variable
X = df_buying[['Age', 'Income ($1000s)', 'Education Level', 'Previous Purchases', 'Marital Status']].values
y = df_buying['Buy Product (Target)'].values

# Add a column of ones for the intercept term
X = np.c_[np.ones(X.shape[0]), X]

# Step 2: Define Logistic Regression Functions

# Sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Cost function
def compute_cost(X, y, theta):
    m = len(y)
    h = sigmoid(np.dot(X, theta))
    epsilon = 1e-5  # to avoid log(0)
    cost = -(1/m) * np.sum(y * np.log(h + epsilon) + (1 - y) * np.log(1 - h + epsilon))
    return cost

# Gradient descent function
def gradient_descent(X, y, theta, alpha, num_iterations):
    m = len(y)
    cost_history = []

    for _ in range(num_iterations):
        h = sigmoid(np.dot(X, theta))
        gradient = (1/m) * np.dot(X.T, (h - y))
        theta -= alpha * gradient
        cost = compute_cost(X, y, theta)
        cost_history.append(cost)

    return theta, cost_history

# Step 3: Train Logistic Regression Model

# Initialize parameters
theta = np.zeros(X.shape[1])  # Initial weights (including bias term)
alpha = 0.1  # Learning rate
num_iterations = 1000

# Train the model using gradient descent
theta, cost_history = gradient_descent(X, y, theta, alpha, num_iterations)

# Step 4: Predictions

# Predict the probability of buying for each customer
probabilities = sigmoid(np.dot(X, theta))
predictions = [1 if prob >= 0.5 else 0 for prob in probabilities]

# Step 5: Display Results

# Print final weights and predictions
print("Trained Weights (Theta):", theta)
print("Predicted Probabilities:", probabilities)
print("Predictions (Buy Product):", predictions)
print("Actual (Buy Product):", y)

# Accuracy
accuracy = np.mean(predictions == y)
print(f"Accuracy: {accuracy * 100:.5f}%")



Trained Weights (Theta): [ -5.78799452 -20.12102869   8.83541477   2.11927808  32.28637126
   4.24054931]
Predicted Probabilities: [2.99133068e-39 1.00000000e+00 5.50734827e-06 1.00000000e+00
 5.13541859e-75 9.99956696e-01 4.50838427e-13 1.00000000e+00
 6.74718592e-14 1.00000000e+00]
Predictions (Buy Product): [0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
Actual (Buy Product): [0 1 0 1 0 1 0 1 0 1]
Accuracy: 100.00000%
