In [2]:
import pandas as pd
from io import StringIO
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_auc_score
import numpy as np

# Step 1: Load and preprocess
df = pd.read_csv("./datafiles/Advertising.csv")
df = df[['TV', 'radio', 'newspaper', 'sales']].dropna()
df['high_sales'] = (df['sales'] > df['sales'].mean()).astype(int)
X = df[['TV', 'radio', 'newspaper']]
y = df['high_sales']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 2: Split data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Step 3: Train model
model = LogisticRegression(random_state=42)
model.fit(X_train, y_train)
print("Coefficients:", dict(zip(['TV', 'radio', 'newspaper'], model.coef_[0])))
print("Intercept:", model.intercept_[0])

# Step 4: Predict
y_pred_proba = model.predict_proba(X_test)[:, 1]
y_pred = model.predict(X_test)

# Step 5: Evaluate
print(f"Accuracy: {accuracy_score(y_test, y_pred):.3f}")
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Coefficients: {'TV': np.float64(2.889640911267738), 'radio': np.float64(2.8241720901588283), 'newspaper': np.float64(-0.11846532025054544)}
Intercept: -0.42783183937371655
Accuracy: 0.975
Confusion Matrix:
 [[24  1]
 [ 0 15]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.96      0.98        25
           1       0.94      1.00      0.97        15

    accuracy                           0.97        40
   macro avg       0.97      0.98      0.97        40
weighted avg       0.98      0.97      0.98        40

