In [11]:
import pandas as pd
import numpy as np
from scipy import stats

# Load the data
df = pd.read_csv('Data-Pregnancy-Related-Mortality.csv')

# Filter for the two categories we're interested in
categories = ['Underweight/Normal weight (<25)', 'Obese III (40+)']
filtered_df = df[df['Subcategory'].isin(categories)].copy()  # Use .copy() to avoid SettingWithCopyWarning

# Convert 'Rate' to numeric, dropping any non-numeric values
filtered_df['Rate'] = pd.to_numeric(filtered_df['Rate'], errors='coerce')

# Separate the data into two groups
normal_weight = filtered_df[filtered_df['Subcategory'] == 'Underweight/Normal weight (<25)']['Rate'].dropna()
obese_iii = filtered_df[filtered_df['Subcategory'] == 'Obese III (40+)']['Rate'].dropna()

# Perform the t-test
t_statistic, p_value = stats.ttest_ind(obese_iii, normal_weight)

# Calculate mean rates
mean_normal = normal_weight.mean()
mean_obese_iii = obese_iii.mean()

# Calculate the percentage difference
percent_difference = ((mean_obese_iii - mean_normal) / mean_normal) * 100

# Prepare the summary
summary = f"""Hypothesis Test: Comparison of Pregnancy-Related Mortality Rates
Obese III (40+) vs. Underweight/Normal weight (<25)

Test: Independent Samples t-test

Results:
t-statistic: {t_statistic:.4f}
p-value: {p_value:.4e}

Mean Rates:
Underweight/Normal weight (<25): {mean_normal:.2f}
Obese III (40+): {mean_obese_iii:.2f}

Percentage Difference:
The pregnancy-related mortality rate for Obese III (40+) is {percent_difference:.2f}% higher than Underweight/Normal weight (<25).

Interpretation:
{'The difference in mortality rates between the two groups is statistically significant.' if p_value < 0.05 else 'The difference in mortality rates between the two groups is not statistically significant.'}
"""

# Save the summary to a text file
with open('bmi_hypothesis_test_results.txt', 'w') as f:
    f.write(summary)

print('Results have been saved to bmi_hypothesis_test_results.txt')
print('\
Here are the contents of the file:')
print(summary)


Results have been saved to bmi_hypothesis_test_results.txt
Here are the contents of the file:
Hypothesis Test: Comparison of Pregnancy-Related Mortality Rates
Obese III (40+) vs. Underweight/Normal weight (<25)

Test: Independent Samples t-test

Results:
t-statistic: 7.8183
p-value: 1.6592e-07

Mean Rates:
Underweight/Normal weight (<25): 8.15
Obese III (40+): 45.15

Percentage Difference:
The pregnancy-related mortality rate for Obese III (40+) is 453.73% higher than Underweight/Normal weight (<25).

Interpretation:
The difference in mortality rates between the two groups is statistically significant.

