In [19]:
import pandas as pd

# Load the new 100-row dataset
df = pd.read_csv("campaign_data_large.csv")

# Show the first 5 rows to confirm
df.head()




Unnamed: 0,User_ID,Age_Group,Region,Purchase_Hour,Product_Category,Campaign_Type,Campaign_Success
0,101,18-25,West,9,Electronics,Launch,No
1,102,18-25,West,17,Electronics,Launch,Yes
2,103,36-45,East,9,Beauty,Discount,Yes
3,104,26-35,North,10,Home Decor,Discount,No
4,105,26-35,North,12,Electronics,Launch,Yes


In [21]:
from sklearn.preprocessing import LabelEncoder

# Make a copy of the data
ml_data = df.copy()

# Convert 'Yes'/'No' to 1/0
ml_data['Campaign_Success'] = ml_data['Campaign_Success'].map({'Yes': 1, 'No': 0})

# Convert categorical columns to numeric
categorical_cols = ['Age_Group', 'Region', 'Product_Category', 'Campaign_Type']
le = LabelEncoder()

for col in categorical_cols:
    ml_data[col] = le.fit_transform(ml_data[col])

# View the processed data
ml_data.head()


Unnamed: 0,User_ID,Age_Group,Region,Purchase_Hour,Product_Category,Campaign_Type,Campaign_Success
0,101,0,3,9,2,2,0
1,102,0,3,17,2,2,1
2,103,2,0,9,0,0,1
3,104,1,1,10,3,0,0
4,105,1,1,12,2,2,1


In [23]:
from sklearn.model_selection import train_test_split

# Features (everything except target column)
X = ml_data.drop('Campaign_Success', axis=1)

# Target column (what we want to predict)
y = ml_data['Campaign_Success']

# Split into 80% training, 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Confirm the split
print("Training set size:", X_train.shape)
print("Test set size:", X_test.shape)


Training set size: (80, 6)
Test set size: (20, 6)


In [25]:
from sklearn.linear_model import LogisticRegression

# Create the model
model = LogisticRegression()

# Train the model using training data
model.fit(X_train, y_train)

# Predict on the test data
y_pred = model.predict(X_test)

# Show predictions
print("Predicted labels:", y_pred)
print("Actual labels:   ", y_test.values)


Predicted labels: [1 1 1 0 1 1 0 1 1 1 1 1 0 1 1 1 1 1 1 1]
Actual labels:    [0 1 1 1 1 0 1 1 1 0 0 1 1 1 0 1 1 0 1 1]


In [27]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)

# Print results
print("✅ Accuracy:", round(accuracy * 100, 2), "%\n")
print("🔍 Confusion Matrix:\n", conf_matrix)
print("\n📊 Classification Report:\n", report)


✅ Accuracy: 55.0 %

🔍 Confusion Matrix:
 [[ 0  6]
 [ 3 11]]

📊 Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         6
           1       0.65      0.79      0.71        14

    accuracy                           0.55        20
   macro avg       0.32      0.39      0.35        20
weighted avg       0.45      0.55      0.50        20

