## Step 1: Creating Sample Data

In [None]:
import pandas as pd

# Sample data
data = {
    "Age_Group": ["Teen", "Teen", "Teen", "Adult", "Adult", "Senior", "Senior", "Senior"],
    "Sports_Interest": ["Yes", "No", "Yes", "No", "No", "Yes", "Yes", "No"]
}

df = pd.DataFrame(data)

# Display the data
print(df)


  Age_Group Sports_Interest
0      Teen             Yes
1      Teen              No
2      Teen             Yes
3     Adult              No
4     Adult              No
5    Senior             Yes
6    Senior             Yes
7    Senior              No


## Step 2: Calculating Joint Probability

In [None]:
# Total number of observations
total_count = len(df)

# Count occurrences where Age_Group is "Teen" and Sports_Interest is "Yes"
joint_count = len(df[(df['Age_Group'] == 'Teen') & (df['Sports_Interest'] == 'Yes')])

# Joint probability
joint_probability = joint_count / total_count

print(f"Joint Probability (Teen and Sports Interest Yes): {joint_probability}")


Joint Probability (Teen and Sports Interest Yes): 0.25


## Step 3: Calculating Conditional Probability

In [None]:
# Filter data for Age_Group = "Teen"
teen_data = df[df['Age_Group'] == 'Teen']

# Count occurrences of Sports_Interest = "Yes" among teens
conditional_count = len(teen_data[teen_data['Sports_Interest'] == 'Yes'])

# Conditional probability
conditional_probability = conditional_count / len(teen_data)

print(f"Conditional Probability (Sports Interest Yes | Age Group Teen): {conditional_probability:.3f}")


Conditional Probability (Sports Interest Yes | Age Group Teen): 0.667


## Step 4: Generalizing with Functions

In [None]:
def calculate_joint_probability(df, condition1, condition2):
    total_count = len(df)
    joint_count = len(df[(df[condition1[0]] == condition1[1]) & (df[condition2[0]] == condition2[1])])
    return joint_count / total_count

def calculate_conditional_probability(df, given_condition, target_condition):
    subset = df[df[given_condition[0]] == given_condition[1]]
    conditional_count = len(subset[subset[target_condition[0]] == target_condition[1]])
    return conditional_count / len(subset)


In [None]:
# Joint Probability of "Teen" and "Sports_Interest = Yes"
joint_prob = calculate_joint_probability(df, ("Age_Group", "Teen"), ("Sports_Interest", "Yes"))
print(f"Joint Probability (Teen and Sports Interest Yes): {joint_prob}")

# Conditional Probability of "Sports_Interest = Yes" given "Age_Group = Teen"
conditional_prob = calculate_conditional_probability(df, ("Age_Group", "Teen"), ("Sports_Interest", "Yes"))
print(f"Conditional Probability (Sports Interest Yes | Age Group Teen): {conditional_prob:.3f}")


Joint Probability (Teen and Sports Interest Yes): 0.25
Conditional Probability (Sports Interest Yes | Age Group Teen): 0.667
