### Deep Learning Lab - II
### Name: Durva Deshpande
### Batch: A1
### Roll No: 4
### Practical 2

In [5]:

import os
import itertools
import pandas as pd
import numpy as np

from pgmpy.models import DiscreteBayesianNetwork
from pgmpy.estimators import BayesianEstimator, MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination

import matplotlib.pyplot as plt
import networkx as nx

# =============== 1) Load data ===============================

CSV_PATH = r"c:/Users/HP/Downloads/meera_gems.csv"

df = pd.read_csv(CSV_PATH)

# Normalize values (strip spaces, enforce consistent labels)
for c in ["Gender","Age_Group","Business","Employed","Marital_Status","Success"]:
    df[c] = df[c].astype(str).str.strip()

# Restrict states to expected discrete sets
expected_states = {
    "Gender": ["Male","Female"],
    "Age_Group": [">35","<=35"],
    "Business": ["Yes","No"],
    "Employed": ["Yes","No"],
    "Marital_Status": ["Married","Single"],
    "Success": ["Yes","No"],
}


In [6]:
# =============== 2) Define BN structure =====================

# Dependency Relationships:
# Age_Group → Marital_Status
# Age_Group → Success
# Gender → Success
# Business → Employed
# Employed → Success
# Marital_Status → Success

edges = [
    ("Age_Group", "Marital_Status"),
    ("Age_Group", "Success"),
    ("Gender", "Success"),
    ("Business", "Employed"),
    ("Employed", "Success"),
    ("Marital_Status", "Success"),
]
model = DiscreteBayesianNetwork(edges)


In [7]:
# =============== 3) Fit CPDs ================================

# Use BayesianEstimator with a small equivalent sample size to avoid zero probabilities
# If you prefer pure MLE, replace estimator with MaximumLikelihoodEstimator.
model.fit(
    df[["Gender","Age_Group","Business","Employed","Marital_Status","Success"]],
    estimator=BayesianEstimator,
    prior_type="BDeu",
    equivalent_sample_size=10,
)

# Validate structure & CPDs
assert model.check_model(), "Model failed check_model()"
print("Model validation: PASSED (check_model)\n")

# Optional: print CPDs
print("=== Learned CPDs (tables truncated if long) ===")
for cpd in model.get_cpds():
    print(cpd)
    print()

INFO:pgmpy: Datatype (N=numerical, C=Categorical Unordered, O=Categorical Ordered) inferred from data: 
 {'Gender': 'C', 'Age_Group': 'C', 'Business': 'C', 'Employed': 'C', 'Marital_Status': 'C', 'Success': 'C'}


Model validation: PASSED (check_model)

=== Learned CPDs (tables truncated if long) ===
+-----------------+----------+
| Age_Group(<=35) | 0.563636 |
+-----------------+----------+
| Age_Group(>35)  | 0.436364 |
+-----------------+----------+

+-------------------------+--------------------+--------------------+
| Age_Group               | Age_Group(<=35)    | Age_Group(>35)     |
+-------------------------+--------------------+--------------------+
| Marital_Status(Married) | 0.5403225806451613 | 0.4479166666666667 |
+-------------------------+--------------------+--------------------+
| Marital_Status(Single)  | 0.4596774193548387 | 0.5520833333333334 |
+-------------------------+--------------------+--------------------+

+----------------+-------------------------+-----+-------------------------+------------------------+
| Age_Group      | Age_Group(<=35)         | ... | Age_Group(>35)          | Age_Group(>35)         |
+----------------+-------------------------+-----+-----------

In [8]:
# =============== 4) Draw & save BN diagram ==================
try:
    plt.figure(figsize=(7,5))
    G = nx.DiGraph()
    G.add_nodes_from(["Age_Group","Gender","Business","Employed","Marital_Status","Success"])
    G.add_edges_from(edges)
    pos = nx.spring_layout(G, seed=7)  # layout for aesthetics
    nx.draw(G, pos, with_labels=True, node_size=2200, font_size=10, arrows=True)
    plt.title("Meera Campaign Bayesian Network")
    plt.tight_layout()
    plt.savefig("bn_structure.png", dpi=160)
    plt.close()
    print("Saved BN diagram to: bn_structure.png\n")
except Exception as e:
    print(f"Could not save BN diagram: {e}\n")

  plt.tight_layout()


Saved BN diagram to: bn_structure.png



In [9]:
# =============== 5) Inference helper ========================

infer = VariableElimination(model)

def bn_prob_success_given(evidence_dict):
    """Return P(Success=Yes | evidence) from the BN."""
    q = infer.query(variables=["Success"], evidence=evidence_dict, show_progress=False)
    return float(q.values[q.state_names["Success"].index("Yes")])

def empirical_conditional_prob(df, target, given):
    """Compute empirical P(target=k | given evidence) for each level of target."""
    # 'given' is dict of variable:value
    mask = pd.Series([True]*len(df))
    for k, v in given.items():
        mask &= (df[k] == v)
    sub = df[mask]
    if len(sub) == 0:
        return {k: np.nan for k in df[target].unique()}
    counts = sub[target].value_counts(normalize=True).to_dict()
    # ensure both states appear
    for s in expected_states[target]:
        counts.setdefault(s, 0.0)
    return counts, len(sub)

def print_section(title):
    print(f"\n{'='*len(title)}\n{title}\n{'='*len(title)}")

In [10]:
# =============== 6) TASKS ===================================

# TASK 1: Explain influence (printed as text summary)
print_section("Task 1: Influence of attributes on Success (by BN structure)")
print(
"""- Age_Group → Success: Age may directly affect receptiveness to the campaign (e.g., spending power/interest).
- Gender → Success: Gender differences can influence product appeal and response.
- Employed → Success: Employment status often correlates with disposable income and purchase intent.
- Marital_Status → Success: Married vs Single can shape jewellery-buying occasions (gifts, weddings, festivals).
- Age_Group → Marital_Status: Older customers more likely to be married in aggregate.
- Business → Employed: Owning a business influences employment status (business owners may be 'Employed' by their own firm or recorded differently).
"""
)


Task 1: Influence of attributes on Success (by BN structure)
- Age_Group → Success: Age may directly affect receptiveness to the campaign (e.g., spending power/interest).
- Gender → Success: Gender differences can influence product appeal and response.
- Employed → Success: Employment status often correlates with disposable income and purchase intent.
- Marital_Status → Success: Married vs Single can shape jewellery-buying occasions (gifts, weddings, festivals).
- Age_Group → Marital_Status: Older customers more likely to be married in aggregate.
- Business → Employed: Owning a business influences employment status (business owners may be 'Employed' by their own firm or recorded differently).



In [11]:
# TASK 2: Draw the BN diagram (already saved)
print_section("Task 2: BN Diagram")
print("Saved as 'bn_structure.png' in the current directory.")



Task 2: BN Diagram
Saved as 'bn_structure.png' in the current directory.


In [12]:
# TASK 3: P(Success=Yes | Gender=Female)
print_section("Task 3: P(Success=Yes | Gender=Female)")
bn_p = bn_prob_success_given({"Gender":"Female"})
(emp_counts, n_emp) = empirical_conditional_prob(df, "Success", {"Gender":"Female"})
emp_p = emp_counts["Yes"] if isinstance(emp_counts, dict) else np.nan
print(f"Bayesian Network: P(Success=Yes | Gender=Female) = {bn_p:.4f}")
print(f"Empirical (from data): P(Success=Yes | Gender=Female) = {emp_p:.4f}  (n={n_emp})")


Task 3: P(Success=Yes | Gender=Female)
Bayesian Network: P(Success=Yes | Gender=Female) = 0.5390
Empirical (from data): P(Success=Yes | Gender=Female) = 0.5660  (n=53)


In [13]:
print_section("Task 4: Among Age>35, proportion Success=Yes")
mask = df["Age_Group"] == ">35"
prop_yes_agegt35 = (df.loc[mask, "Success"] == "Yes").mean() if mask.any() else np.nan
n_agegt35 = mask.sum()
print(f"Empirical proportion: P(Success=Yes | Age_Group=>35) = {prop_yes_agegt35:.4f}  (n={n_agegt35})")


Task 4: Among Age>35, proportion Success=Yes
Empirical proportion: P(Success=Yes | Age_Group=>35) = 0.5349  (n=43)


In [14]:
# TASK 5: Significant difference in Success between Employed vs Unemployed
print_section("Task 5: Success rates: Employed vs Unemployed")
tab = pd.crosstab(df["Employed"], df["Success"])
rate_emp_yes = tab.loc["Yes","Yes"]/tab.loc["Yes"].sum() if "Yes" in tab.index else np.nan
rate_emp_no  = tab.loc["No","Yes"]/tab.loc["No"].sum() if "No" in tab.index else np.nan
print("Contingency table (rows=Employed, cols=Success):\n", tab, "\n")
print(f"Success rate (Employed=Yes): {rate_emp_yes:.4f}")
print(f"Success rate (Employed=No):  {rate_emp_no:.4f}")


Task 5: Success rates: Employed vs Unemployed
Contingency table (rows=Employed, cols=Success):
 Success   No  Yes
Employed         
No        17   30
Yes       27   26 

Success rate (Employed=Yes): 0.4906
Success rate (Employed=No):  0.6383


In [15]:
# TASK 6: P(Success | Business=Yes, Marital_Status=Married)
print_section("Task 6: P(Success=Yes | Business=Yes, Marital_Status=Married)")
bn_p_6 = bn_prob_success_given({"Business":"Yes","Marital_Status":"Married"})
(emp_counts6, n6) = empirical_conditional_prob(df, "Success", {"Business":"Yes","Marital_Status":"Married"})
emp_p_6 = emp_counts6["Yes"] if isinstance(emp_counts6, dict) else np.nan
print(f"Bayesian Network: P(Success=Yes | Business=Yes, Marital_Status=Married) = {bn_p_6:.4f}")
print(f"Empirical (from data): P(Success=Yes | Business=Yes, Marital_Status=Married) = {emp_p_6:.4f}  (n={n6})")


Task 6: P(Success=Yes | Business=Yes, Marital_Status=Married)
Bayesian Network: P(Success=Yes | Business=Yes, Marital_Status=Married) = 0.5816
Empirical (from data): P(Success=Yes | Business=Yes, Marital_Status=Married) = 0.6296  (n=27)


In [16]:
# TASK 7: Joint probability of Male, <=35, Employed, Married achieving Success
print_section("Task 7: Joint P(Male, <=35, Employed=Yes, Married, Success=Yes)")
# We compute: P(G=Male, A=<=35, Employed=Yes, Marital=Married) * P(Success=Yes | that evidence)
q_joint = infer.query(
    variables=["Gender","Age_Group","Employed","Marital_Status"],
    joint=True,
    show_progress=False
)
# helper to index factor by state names
def get_factor_value(factor, assign):
    # assign is dict: var -> value
    # Factor order is factor.scope(), need to find index
    idxs = []
    for v in factor.scope():
        states = factor.state_names[v]
        idxs.append(states.index(assign[v]))
    return float(factor.values[tuple(idxs)])

evidence_assign = {
    "Gender":"Male",
    "Age_Group":"<=35",
    "Employed":"Yes",
    "Marital_Status":"Married",
}
p_evidence = get_factor_value(q_joint, evidence_assign)
p_success_given_evidence = bn_prob_success_given(evidence_assign)
joint_prob = p_evidence * p_success_given_evidence
print(f"P(evidence) = {p_evidence:.6f}")
print(f"P(Success=Yes | evidence) = {p_success_given_evidence:.6f}")
print(f"Joint probability = {joint_prob:.6f}")


Task 7: Joint P(Male, <=35, Employed=Yes, Married, Success=Yes)
P(evidence) = 0.075910
P(Success=Yes | evidence) = 0.268116
Joint probability = 0.020353


In [17]:
%pip install pgmpy

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


### Conclusion
In this lab, we successfully constructed a Bayesian Network model to understand the probabilistic relationships among demographic and occupational attributes influencing customer success in a marketing campaign. Using the pgmpy library, we defined the structure of the network based on domain knowledge and trained it using the BayesianEstimator on the provided dataset.

Key takeaways include:

Inference using the model aligned well with empirical observations, validating the network's structure and parameter estimates.

Factors like employment status, age group, and marital status significantly influenced the likelihood of campaign success.

We visualized the BN structure, computed both conditional and joint probabilities, and interpreted real-world implications from the model’s outputs.

This exercise deepened our understanding of probabilistic graphical models and how Bayesian inference can aid decision-making using uncertain, real-world data.