# Statistical Methods Demo
## Advanced Statistical Analysis Toolkit

This notebook demonstrates the key features of the toolkit.

In [None]:
# Import required libraries
import sys
sys.path.insert(0, '../python')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette('husl')

print("✓ Libraries imported successfully")

## 1. Load Sample Data

In [None]:
# Load clinical trial data
data = pd.read_csv('../data/sample_datasets/clinical_trial.csv')

print(f"Dataset shape: {data.shape}")
print("\nFirst few rows:")
data.head()

## 2. Descriptive Statistics

In [None]:
# Summary statistics
data.describe()

In [None]:
# Group statistics
data.groupby('group')['score'].describe()

## 3. Data Visualization

In [None]:
# Create visualizations
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Boxplot
sns.boxplot(data=data, x='group', y='score', ax=axes[0])
axes[0].set_title('Score by Group')
axes[0].set_ylabel('Score')

# Histogram
for group in data['group'].unique():
    subset = data[data['group'] == group]['score']
    axes[1].hist(subset, alpha=0.6, label=group, bins=15)

axes[1].set_xlabel('Score')
axes[1].set_ylabel('Frequency')
axes[1].set_title('Distribution of Scores')
axes[1].legend()

plt.tight_layout()
plt.show()

## 4. Statistical Tests

In [None]:
from scipy import stats

# Extract groups
treatment = data[data['group'] == 'treatment']['score']
control = data[data['group'] == 'control']['score']

# Perform t-test
t_stat, p_value = stats.ttest_ind(treatment, control)

print("Independent Samples T-Test")
print("="*40)
print(f"Treatment mean: {treatment.mean():.2f}")
print(f"Control mean: {control.mean():.2f}")
print(f"\nt-statistic: {t_stat:.4f}")
print(f"p-value: {p_value:.4f}")

if p_value < 0.05:
    print("\n→ Significant difference detected (p < 0.05)")
else:
    print("\n→ No significant difference (p ≥ 0.05)")

## 5. Effect Size

In [None]:
# Calculate Cohen's d
def cohens_d(group1, group2):
    n1, n2 = len(group1), len(group2)
    var1, var2 = group1.var(ddof=1), group2.var(ddof=1)
    pooled_std = np.sqrt(((n1 - 1) * var1 + (n2 - 1) * var2) / (n1 + n2 - 2))
    return (group1.mean() - group2.mean()) / pooled_std

d = cohens_d(treatment, control)

print(f"Cohen's d: {d:.4f}")

if abs(d) < 0.2:
    effect = "Small"
elif abs(d) < 0.5:
    effect = "Medium"
else:
    effect = "Large"

print(f"Effect size: {effect}")

## 6. Conclusion

This notebook demonstrated:
- Loading and exploring data
- Calculating descriptive statistics
- Creating visualizations
- Performing hypothesis tests
- Calculating effect sizes

For more examples, see the `examples/` directory.