In [245]:
import pandas as pd
import numpy as np
from dash import Dash, dcc, html, Input, Output, ctx
import plotly.express as px
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
from sklearn.ensemble import RandomForestClassifier
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, accuracy_score


from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import seaborn as sns
import plotly.figure_factory as ff

In [246]:
def load_data():
    url = 'marketing_campaign.csv'  
    df = pd.read_csv(url, delimiter='\t') 

    df.columns = df.columns.str.strip() 
    print("DataFrame Columns:", df.columns.tolist()) 

    return df

In [247]:
def train_random_forest(df):

    features = df[['Income', 'MntWines', 'MntFruits']]  
    target = df['Marital_Status']  

    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=0)

    rf_model = RandomForestClassifier(random_state=0)
    rf_model.fit(X_train, y_train)

    rf_predictions = rf_model.predict(X_test)
    
    return rf_model, rf_predictions

In [248]:
def apply_kmeans(df, n_clusters=3):
    features = df[['Income', 'MntWines', 'MntFruits']]  
    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(features)

    kmeans = KMeans(n_clusters=n_clusters, random_state=0)
    df['Cluster'] = kmeans.fit_predict(scaled_features)
    
    return kmeans, df


In [249]:
def apply_kmeans(data):
    features = data[['Income', 'MntWines', 'MntFruits']]
    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(features)

    kmeans = KMeans(n_clusters=3, random_state=0)
    data['Cluster'] = kmeans.fit_predict(scaled_features)
    return kmeans, data 

In [251]:
def train_random_forest(data):
    features = data[['Income', 'MntWines', 'MntFruits']]  
    labels = data['Marital_Status'] 
    rf_model = RandomForestClassifier(random_state=0)
    rf_model.fit(features, labels)
    predictions = rf_model.predict(features)  
    return rf_model, predictions 


In [252]:
df = load_data()

df.dropna(subset=['Income', 'MntWines', 'MntFruits', 'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts', 'MntGoldProds'], inplace=True)

DataFrame Columns: ['ID', 'Year_Birth', 'Education', 'Marital_Status', 'Income', 'Kidhome', 'Teenhome', 'Dt_Customer', 'Recency', 'MntWines', 'MntFruits', 'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts', 'MntGoldProds', 'NumDealsPurchases', 'NumWebPurchases', 'NumCatalogPurchases', 'NumStorePurchases', 'NumWebVisitsMonth', 'AcceptedCmp3', 'AcceptedCmp4', 'AcceptedCmp5', 'AcceptedCmp1', 'AcceptedCmp2', 'Complain', 'Z_CostContact', 'Z_Revenue', 'Response']


In [253]:
app = Dash(__name__)

In [254]:
# Layout of the dashboard
app.layout = html.Div([
    html.Div([
        html.H1("Customer Insights Dashboard"),
        html.Label("Select Education Level:"),
        dcc.Dropdown(
            id='education-dropdown',
            options=[{'label': i, 'value': i} for i in df['Education'].unique()],
            multi=True,
            value=[df['Education'].unique()[0]],
            style={'padding': '5px', 'border-radius': '8px', 'backgroundColor': '#e6e6fa'}
            
        ),  
        html.Label("Select Marital Status:"),
        dcc.Dropdown(
            id='marital-status-dropdown', style={'padding': '5px', 'border-radius': '8px', 'backgroundColor': '#e6e6fa'},
            options=[{'label': i, 'value': i} for i in df['Marital_Status'].unique()],
            multi=True,
            value=[df['Marital_Status'].unique()[0]],
        ),
        html.Button('Update Graphs', id='update-button', n_clicks=0),
    ], style={'width': '25%', 'display': 'inline-block', 'vertical-align': 'top'}), 

    html.Div([
        dcc.Graph(id='income-distribution'),
        dcc.Graph(id='customer-count'),

        # Average Income and Total Spending
        html.Div([
            dcc.Graph(id='avg-income-education', style={'width': '48%', 'display': 'inline-block'}),
            dcc.Graph(id='total-spending', style={'width': '48%', 'display': 'inline-block'}),
        ], style={'display': 'flex', 'justify-content': 'space-between'}), 

        dcc.Graph(id='customer-segmentation'),  # K-Means Clustering
        dcc.Graph(id='random-forest-output'),  # Confusion Matrix
        dcc.Graph(id='feature-importance-plot'),  # Feature Importance
    ], style={'width': '75%', 'display': 'inline-block'})  
])


In [257]:
@app.callback(
    [Output('income-distribution', 'figure'),
     Output('customer-count', 'figure'),
     Output('avg-income-education', 'figure'),
     Output('total-spending', 'figure'),
     Output('customer-segmentation', 'figure'),
     Output('random-forest-output', 'figure'),
     Output('feature-importance-plot', 'figure')],
    [Input('update-button', 'n_clicks'),
     Input('education-dropdown', 'value'),
     Input('marital-status-dropdown', 'value')]
)
def update_graphs(n_clicks, education, marital_status):
    filtered_data = df[df['Education'].isin(education) & df['Marital_Status'].isin(marital_status)]
    
    # Income Distribution
    income_fig = px.histogram(
    filtered_data, 
    x='Income', 
    nbins=30, 
    title='Income Distribution', 
    marginal='box', 
    color_discrete_sequence=['#1f77b4']  
)


    # Customer Count by Marital Status
    customer_count_fig = px.histogram(
    filtered_data, 
    x='Marital_Status', 
    title='Count of Customers by Marital Status',
    color='Marital_Status',  
    color_discrete_sequence=px.colors.qualitative.Plotly
)

    # Average Income by Education Level
    avg_income_fig = px.bar(
    df.groupby('Education')['Income'].mean().reset_index(),
    x='Education', 
    y='Income', 
    title='Average Income by Education Level',
    color='Income', 
    color_continuous_scale=px.colors.sequential.Plasma
)


    # Total Spending by Category
    spending_categories = ['MntWines', 'MntFruits', 'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts', 'MntGoldProds']
    total_spending = filtered_data[spending_categories].sum().reset_index()
    total_spending.columns = ['Category', 'Total Spending']
    total_spending_fig = px.pie(total_spending, values='Total Spending', names='Category', title='Total Spending by Category')

    # K-Means Clustering
    kmeans_model, clustered_data = apply_kmeans(filtered_data)
    clustering_fig = px.scatter(clustered_data, x='Income', y='MntWines', color='Cluster', title='K-Means Clustering', color_continuous_scale=px.colors.qualitative.Plotly)


    # Random Forest Model Output
    rf_model, rf_predictions = train_random_forest(filtered_data)
    
    # Confusion Matrix
    cm = confusion_matrix(filtered_data['Marital_Status'], rf_predictions)
    cm_fig = px.imshow(cm, text_auto=True, title='Confusion Matrix', labels={'x': 'Predicted', 'y': 'Actual'})

    # Feature Importance
    feature_importances = rf_model.feature_importances_
    feature_names = filtered_data[['Income', 'MntWines', 'MntFruits']].columns
    feature_importance_fig = px.bar(
    x=feature_names, 
    y=feature_importances, 
    title='Feature Importance from Random Forest',
    color=feature_importances, 
    color_continuous_scale=px.colors.sequential.Viridis
)


    return income_fig, customer_count_fig, avg_income_fig, total_spending_fig, clustering_fig, cm_fig, feature_importance_fig


In [256]:
if __name__ == '__main__':
    app.run_server(debug=True)


A single label was found in 'y_true' and 'y_pred'. For the confusion matrix to have the correct shape, use the 'labels' parameter to pass all known labels.


A single label was found in 'y_true' and 'y_pred'. For the confusion matrix to have the correct shape, use the 'labels' parameter to pass all known labels.


A single label was found in 'y_true' and 'y_pred'. For the confusion matrix to have the correct shape, use the 'labels' parameter to pass all known labels.


A single label was found in 'y_true' and 'y_pred'. For the confusion matrix to have the correct shape, use the 'labels' parameter to pass all known labels.


A single label was found in 'y_true' and 'y_pred'. For the confusion matrix to have the correct shape, use the 'labels' parameter to pass all known labels.

