In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

df = pd.read_excel(r'C:\Users\Shivani\Downloads\ML marketing_campaign.xlsx')

#  Clean column names 
df.columns = (
    df.columns
    .str.strip()
    .str.lower()
    .str.replace(r"[^\w\s]", "", regex=True)  # remove punctuation
    .str.replace(r"\s+", "_", regex=True)     # replace spaces with underscores
)

# Convert all object-type (text) columns into numerical using label encoding 
label_encoder = LabelEncoder()
for column in df.select_dtypes(include='object').columns:
    df[column + "_encoded"] = label_encoder.fit_transform(df[column].astype(str))

# Preview cleaned & encoded data 
print(" Cleaned DataFrame Preview ")
print(df.head())

# df.to_csv("cleaned_survey_data.csv", index=False)

 Cleaned DataFrame Preview 
     id  year_birth   education marital_status   income  kidhome  teenhome  \
0  5524        1957  Graduation         Single  58138.0        0         0   
1  2174        1954  Graduation         Single  46344.0        1         1   
2  4141        1965  Graduation       Together  71613.0        0         0   
3  6182        1984  Graduation       Together  26646.0        1         0   
4  5324        1981         PhD        Married  58293.0        1         0   

  dt_customer  recency  mntwines  ...  acceptedcmp5  acceptedcmp1  \
0  2012-09-04       58       635  ...             0             0   
1  2014-03-08       38        11  ...             0             0   
2  2013-08-21       26       426  ...             0             0   
3  2014-02-10       26        11  ...             0             0   
4  2014-01-19       94       173  ...             0             0   

   acceptedcmp2  complain  z_costcontact  z_revenue  response  \
0             0        

In [3]:
df.shape

(2240, 32)

In [5]:
df.columns

Index(['id', 'year_birth', 'education', 'marital_status', 'income', 'kidhome',
       'teenhome', 'dt_customer', 'recency', 'mntwines', 'mntfruits',
       'mntmeatproducts', 'mntfishproducts', 'mntsweetproducts',
       'mntgoldprods', 'numdealspurchases', 'numwebpurchases',
       'numcatalogpurchases', 'numstorepurchases', 'numwebvisitsmonth',
       'acceptedcmp3', 'acceptedcmp4', 'acceptedcmp5', 'acceptedcmp1',
       'acceptedcmp2', 'complain', 'z_costcontact', 'z_revenue', 'response',
       'education_encoded', 'marital_status_encoded', 'dt_customer_encoded'],
      dtype='object')

In [7]:
df.isnull().sum()

id                         0
year_birth                 0
education                  0
marital_status             0
income                    24
kidhome                    0
teenhome                   0
dt_customer                0
recency                    0
mntwines                   0
mntfruits                  0
mntmeatproducts            0
mntfishproducts            0
mntsweetproducts           0
mntgoldprods               0
numdealspurchases          0
numwebpurchases            0
numcatalogpurchases        0
numstorepurchases          0
numwebvisitsmonth          0
acceptedcmp3               0
acceptedcmp4               0
acceptedcmp5               0
acceptedcmp1               0
acceptedcmp2               0
complain                   0
z_costcontact              0
z_revenue                  0
response                   0
education_encoded          0
marital_status_encoded     0
dt_customer_encoded        0
dtype: int64

In [9]:
drop_cols = ['id', 'dt_customer', 'z_costcontact', 'z_revenue']
df = df.drop(columns=drop_cols)

In [11]:
categorical_cols = ['education', 'marital_status']
df_encoded = pd.get_dummies(df, columns=categorical_cols, drop_first=True)

# Check the new shape and columns
print(df_encoded.shape)
print(df_encoded.columns)

(2240, 37)
Index(['year_birth', 'income', 'kidhome', 'teenhome', 'recency', 'mntwines',
       'mntfruits', 'mntmeatproducts', 'mntfishproducts', 'mntsweetproducts',
       'mntgoldprods', 'numdealspurchases', 'numwebpurchases',
       'numcatalogpurchases', 'numstorepurchases', 'numwebvisitsmonth',
       'acceptedcmp3', 'acceptedcmp4', 'acceptedcmp5', 'acceptedcmp1',
       'acceptedcmp2', 'complain', 'response', 'education_encoded',
       'marital_status_encoded', 'dt_customer_encoded', 'education_Basic',
       'education_Graduation', 'education_Master', 'education_PhD',
       'marital_status_Alone', 'marital_status_Divorced',
       'marital_status_Married', 'marital_status_Single',
       'marital_status_Together', 'marital_status_Widow',
       'marital_status_YOLO'],
      dtype='object')


In [13]:
df_encoded.columns


Index(['year_birth', 'income', 'kidhome', 'teenhome', 'recency', 'mntwines',
       'mntfruits', 'mntmeatproducts', 'mntfishproducts', 'mntsweetproducts',
       'mntgoldprods', 'numdealspurchases', 'numwebpurchases',
       'numcatalogpurchases', 'numstorepurchases', 'numwebvisitsmonth',
       'acceptedcmp3', 'acceptedcmp4', 'acceptedcmp5', 'acceptedcmp1',
       'acceptedcmp2', 'complain', 'response', 'education_encoded',
       'marital_status_encoded', 'dt_customer_encoded', 'education_Basic',
       'education_Graduation', 'education_Master', 'education_PhD',
       'marital_status_Alone', 'marital_status_Divorced',
       'marital_status_Married', 'marital_status_Single',
       'marital_status_Together', 'marital_status_Widow',
       'marital_status_YOLO'],
      dtype='object')

In [15]:
df_encoded['income'].fillna(df_encoded['income'].median(), inplace=True)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_encoded['income'].fillna(df_encoded['income'].median(), inplace=True)


In [17]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

X = df_encoded.drop('response', axis=1)
y = df_encoded['response']

# 3. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Feature Scaling (Logistic Regression benefits from this)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 5. Train the Logistic Regression model
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

# 6. Make predictions
y_pred = model.predict(X_test_scaled)

# 7. Evaluation
print("Accuracy Score:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy Score: 0.8683035714285714

Confusion Matrix:
 [[366  13]
 [ 46  23]]

Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.97      0.93       379
           1       0.64      0.33      0.44        69

    accuracy                           0.87       448
   macro avg       0.76      0.65      0.68       448
weighted avg       0.85      0.87      0.85       448



In [37]:
import joblib
joblib.dump(model, 'customer_response_model.joblib')
print("Model loaded ✅")

Model loaded ✅


In [53]:
from sklearn.preprocessing import StandardScaler
import joblib

# Define features (excluding the target 'response')
feature_names = [
    'year_birth', 'income', 'kidhome', 'teenhome', 'recency',
    'mntwines', 'mntfruits', 'mntmeatproducts', 'mntfishproducts',
    'mntsweetproducts', 'mntgoldprods', 'numdealspurchases',
    'numwebpurchases', 'numcatalogpurchases', 'numstorepurchases',
    'numwebvisitsmonth', 'acceptedcmp3', 'acceptedcmp4', 'acceptedcmp5',
    'acceptedcmp1', 'acceptedcmp2', 'complain', 'education_encoded',
    'marital_status_encoded', 'dt_customer_encoded', 'education_Basic',
    'education_Graduation', 'education_Master', 'education_PhD',
    'marital_status_Alone', 'marital_status_Divorced',
    'marital_status_Married', 'marital_status_Single',
    'marital_status_Together', 'marital_status_Widow',
    'marital_status_YOLO'
]

# Extract features for scaling
X = df_encoded[feature_names]

# Fit the scaler
scaler = StandardScaler()
scaler.fit(X)

# Save the scaler
joblib.dump(scaler, 'scaler.joblib')
print("✅ Scaler saved as 'scaler.joblib'")




✅ Scaler saved as 'scaler.joblib'


In [59]:
streamlit_code = r"""
import streamlit as st
import pandas as pd
import joblib
from sklearn.preprocessing import StandardScaler

# Title of the app
st.title("📊 Customer Response Prediction")

# Load the trained model and scaler
model = joblib.load("customer_response_model.joblib")  # Update with your model filename
scaler = joblib.load("scaler.joblib")  # Load the scaler used during training

# Static dropdown options for education and marital status
education_levels = ['Basic', 'Graduation', 'Master', 'PhD']
marital_status_options = ['Alone', 'Divorced', 'Married', 'Single', 'Together', 'Widow', 'YOLO']

# Feature list (to match scaler joblib)
feature_names = [
    'year_birth', 'income', 'kidhome', 'teenhome', 'recency',
    'mntwines', 'mntfruits', 'mntmeatproducts', 'mntfishproducts',
    'mntsweetproducts', 'mntgoldprods', 'numdealspurchases',
    'numwebpurchases', 'numcatalogpurchases', 'numstorepurchases',
    'numwebvisitsmonth', 'acceptedcmp3', 'acceptedcmp4', 'acceptedcmp5',
    'acceptedcmp1', 'acceptedcmp2', 'complain', 'education_encoded',
    'marital_status_encoded', 'dt_customer_encoded', 'education_Basic',
    'education_Graduation', 'education_Master', 'education_PhD',
    'marital_status_Alone', 'marital_status_Divorced',
    'marital_status_Married', 'marital_status_Single',
    'marital_status_Together', 'marital_status_Widow',
    'marital_status_YOLO'
]

# Initialize user input dictionary
user_input = {}

# Numeric features
numeric_features = [
    'year_birth', 'income', 'kidhome', 'teenhome', 'recency',
    'mntwines', 'mntfruits', 'mntmeatproducts', 'mntfishproducts',
    'mntsweetproducts', 'mntgoldprods', 'numdealspurchases',
    'numwebpurchases', 'numcatalogpurchases', 'numstorepurchases',
    'numwebvisitsmonth', 'acceptedcmp3', 'acceptedcmp4', 'acceptedcmp5',
    'acceptedcmp1', 'acceptedcmp2', 'complain', 'education_encoded',
    'marital_status_encoded', 'dt_customer_encoded'
]

# Input for numeric features
for feature in numeric_features:
    val = st.number_input(f"{feature}", value=0.0)  # Default to 0.0
    user_input[feature] = val

# Dropdown for Education
selected_edu = st.selectbox("Education", education_levels)
for level in education_levels:
    user_input[f"education_{level}"] = 1 if level == selected_edu else 0

# Dropdown for Marital Status
selected_status = st.selectbox("Marital Status", marital_status_options)
for status in marital_status_options:
    user_input[f"marital_status_{status}"] = 1 if status == selected_status else 0

# Convert user input into a dataframe
input_df = pd.DataFrame([[user_input[feat] for feat in feature_names]], columns=feature_names)

# Optional: Scale features (if used during training)
input_scaled = scaler.transform(input_df)

# Predict button
if st.button("🔮 Predict"):
    # Predict with the trained model
    prediction = model.predict(input_scaled)
    pred_label = "Responded to Campaign" if prediction[0] == 1 else "Did Not Respond"
    st.success(f"✅ Prediction: {pred_label}")
"""
with open("customer_response_app.py", "w", encoding="utf-8") as file:
    file.write(streamlit_code)

print("✅ Streamlit app saved as 'customer_response_app.py'")


✅ Streamlit app saved as 'customer_response_app.py'


In [None]:
!streamlit run customer_response_app.py