In [42]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler

# Sample dataset (small, just for learning)
data = {
    'age': [22, 25, 47, 52, 46, 56, 23, 33],
    'income': [25000, 27000, 60000, 52000, 49000, 80000, 22000, 30000],
    'pageCount': [250, 300, 500, 450, 600, 700, 200, 320],
    'popularityScore': [8.0, 8.0, 9.5, 9.5, 9.5, 9.5, 8.0, 8.5],
    'willBuy': [0, 0, 1, 1, 1, 1, 0, 0]  # 1 = will buy, 0 = won't buy
}

df = pd.DataFrame(data)

# Features and target
X = df[['age', 'income', 'pageCount', 'popularityScore']]
y = df['willBuy']

# Scale X before training
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Train logistic regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Predict on test set
y_pred = model.predict(X_test)

# Evaluation
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Confusion Matrix:
 [[1 0]
 [0 1]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2



In [6]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report

# Step 1: Create a larger, more varied dataset
np.random.seed(42)

# Generate 100 samples
n_samples = 100
ages = np.random.randint(18, 60, size=n_samples)
income = np.random.randint(20000, 100000, size=n_samples)
page_counts = np.random.randint(100, 1000, size=n_samples)
popularity_scores = np.round(np.random.uniform(7.0, 10.0, size=n_samples), 1)

# Rule to simulate if they will buy or not
# (just for demo: the more income + more popular the book, the more likely they'll buy)
buy_probability = (
    0.3 * (income / income.max()) + 
    0.2 * (page_counts / page_counts.max()) +
    0.5 * (popularity_scores / 10)
)

# Add noise and threshold to convert probability to binary class
noise = np.random.normal(0, 0.1, n_samples)
buy_labels = (buy_probability + noise > 0.6).astype(int)

# Combine into DataFrame
df = pd.DataFrame({
    'age': ages,
    'income': income,
    'pageCount': page_counts,
    'popularityScore': popularity_scores,
    'willBuy': buy_labels
})

# Step 2: Features and Target
X = df[['age', 'income', 'pageCount', 'popularityScore']]
y = df['willBuy']

# Step 3: Scale the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 4: Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.25, random_state=42, stratify=y
)

# Step 5: Train logistic regression with higher iterations
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Step 6: Predictions and Evaluation
y_pred = model.predict(X_test)

# Step 7: Print results
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Confusion Matrix:
 [[ 1  2]
 [ 0 22]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.33      0.50         3
           1       0.92      1.00      0.96        22

    accuracy                           0.92        25
   macro avg       0.96      0.67      0.73        25
weighted avg       0.93      0.92      0.90        25

