In [57]:
import random
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from scipy.stats import ttest_ind
import pandas as pd

In [58]:
file_path = "drug_safety.csv"
df = pd.read_csv(file_path)
df

Unnamed: 0,age,sex,trx,week,wbc,rbc,adverse_effects,num_effects
0,62,male,Drug,0,7.3,5.1,No,0
1,62,male,Drug,1,,,No,0
2,62,male,Drug,12,5.6,5.0,No,0
3,62,male,Drug,16,,,No,0
4,62,male,Drug,2,6.6,5.1,No,0
...,...,...,...,...,...,...,...,...
16098,78,male,Placebo,16,,,Yes,1
16099,78,male,Placebo,2,7.5,4.9,No,0
16100,78,male,Placebo,20,,,Yes,1
16101,78,male,Placebo,4,6.4,4.8,No,0


In [59]:
df.describe()

Unnamed: 0,age,week,wbc,rbc,num_effects
count,16103.0,16103.0,9128.0,9127.0,16103.0
mean,64.117556,7.74098,7.340557,4.672784,0.101596
std,8.783207,6.9435,1.996652,0.45852,0.323181
min,39.0,0.0,1.8,2.1,0.0
25%,58.0,1.0,6.0,4.4,0.0
50%,65.0,4.0,7.1,4.7,0.0
75%,71.0,12.0,8.4,5.0,0.0
max,84.0,20.0,26.5,7.6,3.0


In [60]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16103 entries, 0 to 16102
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   age              16103 non-null  int64  
 1   sex              16103 non-null  object 
 2   trx              16103 non-null  object 
 3   week             16103 non-null  int64  
 4   wbc              9128 non-null   float64
 5   rbc              9127 non-null   float64
 6   adverse_effects  16103 non-null  object 
 7   num_effects      16103 non-null  int64  
dtypes: float64(2), int64(3), object(3)
memory usage: 1006.6+ KB


In [61]:
def convert_adverse_effects_to_int(value):
    if(value == "Yes"):
        return 1
    return 0

df['adverse_effects'] = df['adverse_effects'].apply(convert_adverse_effects_to_int)
df['adverse_effects']

0        0
1        0
2        0
3        0
4        0
        ..
16098    1
16099    0
16100    1
16101    0
16102    0
Name: adverse_effects, Length: 16103, dtype: int64

In [62]:
df.describe()

Unnamed: 0,age,week,wbc,rbc,adverse_effects,num_effects
count,16103.0,16103.0,9128.0,9127.0,16103.0,16103.0
mean,64.117556,7.74098,7.340557,4.672784,0.095386,0.101596
std,8.783207,6.9435,1.996652,0.45852,0.293756,0.323181
min,39.0,0.0,1.8,2.1,0.0,0.0
25%,58.0,1.0,6.0,4.4,0.0,0.0
50%,65.0,4.0,7.1,4.7,0.0,0.0
75%,71.0,12.0,8.4,5.0,0.0,0.0
max,84.0,20.0,26.5,7.6,1.0,3.0


In [63]:
df_cleaned = df.dropna()
df_cleaned


Unnamed: 0,age,sex,trx,week,wbc,rbc,adverse_effects,num_effects
0,62,male,Drug,0,7.3,5.1,0,0
2,62,male,Drug,12,5.6,5.0,0,0
4,62,male,Drug,2,6.6,5.1,0,0
6,62,male,Drug,4,6.9,5.2,1,1
7,62,male,Drug,8,7.1,5.0,1,1
...,...,...,...,...,...,...,...,...
16095,78,male,Placebo,0,7.2,5.0,0,0
16097,78,male,Placebo,12,6.5,4.9,0,0
16099,78,male,Placebo,2,7.5,4.9,0,0
16101,78,male,Placebo,4,6.4,4.8,0,0


In [64]:
df_cleaned.info()

<class 'pandas.core.frame.DataFrame'>
Index: 9127 entries, 0 to 16102
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   age              9127 non-null   int64  
 1   sex              9127 non-null   object 
 2   trx              9127 non-null   object 
 3   week             9127 non-null   int64  
 4   wbc              9127 non-null   float64
 5   rbc              9127 non-null   float64
 6   adverse_effects  9127 non-null   int64  
 7   num_effects      9127 non-null   int64  
dtypes: float64(2), int64(4), object(2)
memory usage: 641.7+ KB


In [65]:
df_cleaned.describe()

Unnamed: 0,age,week,wbc,rbc,adverse_effects,num_effects
count,9127.0,9127.0,9127.0,9127.0,9127.0,9127.0
mean,64.00767,4.985428,7.340331,4.672784,0.094664,0.1008
std,8.847711,4.375397,1.996645,0.45852,0.292767,0.322178
min,39.0,0.0,1.8,2.1,0.0,0.0
25%,58.0,2.0,6.0,4.4,0.0,0.0
50%,65.0,4.0,7.1,4.7,0.0,0.0
75%,71.0,8.0,8.4,5.0,0.0,0.0
max,84.0,20.0,26.5,7.6,1.0,3.0


In [66]:
drug_group = df_cleaned[df_cleaned['trx'] == 'Drug'][['wbc', 'rbc', 'num_effects', 'adverse_effects']]
drug_group

Unnamed: 0,wbc,rbc,num_effects,adverse_effects
0,7.3,5.1,0,0
2,5.6,5.0,0,0
4,6.6,5.1,0,0
6,6.9,5.2,1,1
7,7.1,5.0,1,1
...,...,...,...,...
16059,9.1,4.6,1,1
16068,8.6,4.6,2,1
16072,7.2,4.2,0,0
16074,6.5,4.5,0,0


In [67]:
drug_group.describe()

Unnamed: 0,wbc,rbc,num_effects,adverse_effects
count,6011.0,6011.0,6011.0,6011.0
mean,7.330461,4.67913,0.102479,0.095991
std,2.008941,0.45499,0.325529,0.294603
min,1.8,2.7,0.0,0.0
25%,6.0,4.4,0.0,0.0
50%,7.0,4.7,0.0,0.0
75%,8.4,5.0,0.0,0.0
max,26.5,7.5,3.0,1.0


In [68]:
placebo_group = df_cleaned[df_cleaned['trx'] == 'Placebo'][['wbc', 'rbc', 'num_effects', 'adverse_effects']]
placebo_group

Unnamed: 0,wbc,rbc,num_effects,adverse_effects
32,7.2,4.7,0,0
34,7.8,4.7,0,0
36,7.6,4.6,0,0
38,7.3,4.6,0,0
39,8.2,4.6,0,0
...,...,...,...,...
16095,7.2,5.0,0,0
16097,6.5,4.9,0,0
16099,7.5,4.9,0,0
16101,6.4,4.8,0,0


In [69]:
placebo_group.describe()

Unnamed: 0,wbc,rbc,num_effects,adverse_effects
count,3116.0,3116.0,3116.0,3116.0
mean,7.359371,4.660542,0.097561,0.092105
std,1.97289,0.465083,0.315639,0.289221
min,3.0,2.1,0.0,0.0
25%,6.0,4.4,0.0,0.0
50%,7.15,4.7,0.0,0.0
75%,8.4,5.0,0.0,0.0
max,23.799999,7.6,3.0,1.0


#### Null Hypothesis (H₀ - "H naught"):
There is no significant difference between the means of the Drug and Placebo groups for the given metric.
Mathematically:
𝐻0 : 𝜇(Drug) = 𝜇(Placebo)
 
This means any observed difference is due to random variation and not due to an actual effect of the drug.

#### Alternative Hypothesis (Hₐ - "H a"):
There is a significant difference between the means of the Drug and Placebo groups for the given metric.
Mathematically:
𝐻𝑎 : 𝜇(Drug) != 𝜇(Placebo)
 
This suggests that the drug has an effect (either increasing or decreasing the metric).

#### How the p-value Helps Decide:

If p-value < significant level → We reject the null hypothesis (H₀) and conclude that there is a significant difference.

If p-value ≥ significant level → We fail to reject H₀, meaning there is no strong evidence that the Drug and Placebo groups differ significantly.

#### alternative argument : 

1. alternative='two-sided':

This tests if the means of the Drug and Placebo groups are significantly different in either direction (higher or lower).

When to use: If we do not have a prior assumption about whether the drug increases or decreases the metric.

2. alternative='greater':

This tests if the mean of the first group (Drug) is significantly greater than the second group (Placebo).

When to use: If we expect the drug to increase the metric (e.g., higher WBC count due to treatment).

3. alternative='less':

This tests if the mean of the first group (Drug) is significantly less than the second group (Placebo).

When to use: If we expect the drug to decrease the metric (e.g., if the drug is expected to reduce adverse effects).


#### Interpretation of t_stat:
If t_stat is close to 0 → The two groups have similar means (no strong evidence of difference).

If t_stat is large (positive or negative) → The means of the two groups are significantly different.

Positive t_stat → The Drug group has a higher mean than the Placebo group.

Negative t_stat → The Drug group has a lower mean than the Placebo group.


#### equal_var Argument in ttest_ind:
1. equal_var=True (Default) → Student’s t-test:

Assumes that the variances of the two groups are equal.
Uses pooled variance (combining both groups' variances into a single estimate).
More powerful when the assumption is true, but can give misleading results if the variances are actually different.

2. equal_var=False → Welch’s t-test:

Does not assume equal variance.
Each group’s variance is treated separately.
More robust when the groups have different sample sizes or variances.
This is the preferred method in real-world data where variances may differ.

In [70]:
metrics = ['wbc', 'rbc', 'num_effects', 'adverse_effects']
significant_levels = [0.05, 0.1]
results = {}

for metric in metrics:
    t_stat, p_value = ttest_ind(drug_group[metric], placebo_group[metric], equal_var=False, alternative='two-sided') #This tests if the means of the Drug and Placebo groups are significantly different in either direction (higher or lower).
    print(f"p_value for metric {metric} : {p_value}")

    results[metric] = {'t-statistic': t_stat, 'p-value': p_value}

    for significant_level in significant_levels:
        print(f"with significant level {significant_level}:")
        if(p_value >= significant_level):
            print(f"we cant reject the H0. So there is not enough evidence that {metric} average is significantly difference in drug and placebo groups\n")
        else:
            print(f"we reject the H0. So {metric} average is significantly difference in drug and placebo groups\n")

results_df = pd.DataFrame(results).T
print("Results:\n", results_df)


p_value for metric wbc : 0.5094775322580585
with significant level 0.05:
we cant reject the H0. So there is not enough evidence that wbc average is significantly difference in drug and placebo groups

with significant level 0.1:
we cant reject the H0. So there is not enough evidence that wbc average is significantly difference in drug and placebo groups

p_value for metric rbc : 0.06821193346979416
with significant level 0.05:
we cant reject the H0. So there is not enough evidence that rbc average is significantly difference in drug and placebo groups

with significant level 0.1:
we reject the H0. So rbc average is significantly difference in drug and placebo groups

p_value for metric num_effects : 0.48503538645811195
with significant level 0.05:
we cant reject the H0. So there is not enough evidence that num_effects average is significantly difference in drug and placebo groups

with significant level 0.1:
we cant reject the H0. So there is not enough evidence that num_effects averag

#### Questions (10% Bonus)
An engineer is monitoring the pressure inside an oil pipeline. Due to varying flow rates and environmental conditions, the pressure in the pipeline fluctuates slightly with time. The true average pressure of the pipeline is unknown. Pressure measurements, 𝑋1, 𝑋2, ..., 𝑋𝑛 satisfy the following model:
𝑋𝑖 = µ + ϵ𝑖 

where µis the unknown true average pressure, and ϵ𝑖 represents random error. The errors are
i.i.d. with mean 0 and unknown standard deviation σ. The pipeline’s pressure is measured
100 times. If we construct an approximate 95% confidence interval for µ, this interval was
constructed for one of the following purposes. Indicate which is correct and explain why:

1. To estimate the average of the 100 pressure measurements and give ourselves some
room for error in the estimate.
2. To estimate the true average pressure of the pipeline and give ourselves some room
for error in the estimate.
3. To provide a range in which 95 of the 100 pressure measurements are likely to have
fallen.
4. To provide a range in which 95% of all possible pressure measurements are likely to
fall


### answer

The correct answer is:

2. "To estimate the true average pressure of the pipeline and give ourselves some room for error in the estimate."

Explanation:

A confidence interval (CI) for 𝜇 is constructed to estimate the true population mean (the unknown average pressure 𝜇 of the pipeline). The interval provides a range where we are 95% confident that the true mean lies, given the sample data.
A 95% confidence interval (CI) means that if we repeated this sampling process many times, 95% of the computed confidence intervals would contain the true mean 𝜇. However, any single confidence interval does not guarantee that it contains 𝜇, just that the method used is correct 95% of the time.

Law of Large Numbers (LLN):

As the sample size (n) increases, the sample mean gets closer to the true mean (μ). With 100 pressure measurements, the sample mean is likely to be a good estimate of μ.

1. "To estimate the average of the 100 pressure measurements and give ourselves some room for error in the estimate."

❌ Incorrect – The sample mean is just a statistic based on the observed data. A confidence interval does not estimate the sample mean itself but rather estimates the true mean μ from which the data was drawn.

3. "To provide a range in which 95 of the 100 pressure measurements are likely to have fallen."

❌ Incorrect – A confidence interval is about estimating the true mean μ, not individual data points. This statement confuses the confidence interval with a prediction interval, which estimates where individual future measurements may fall.

4. "To provide a range in which 95% of all possible pressure measurements are likely to fall."

❌ Incorrect – Again, this would be a prediction interval rather than a confidence interval. A confidence interval estimates where the true mean μ is likely to be, while a prediction interval estimates where individual data points are likely to fall.