## 1. Python’s Built-in Statistics Module

In [1]:
import statistics as stats

# Sample data
data1 = [10, 12, 14, 15, 18, 20, 22]
data2 = [16, 18, 20, 21, 22, 24, 26]

# Calculate means
mean1 = stats.mean(data1)
mean2 = stats.mean(data2)

# Mean difference
mean_diff = mean2 - mean1

print(f"Mean of data1: {mean1}")
print(f"Mean of data2: {mean2}")
print(f"Mean difference: {mean_diff}")

Mean of data1: 15.857142857142858
Mean of data2: 21
Mean difference: 5.142857142857142


## 2. NumPy

In [5]:
import numpy as np

# Example data
X = np.random.rand(100)
y = 2 * X + np.random.randn(100) * 0.2

X = np.vstack([np.ones(len(X)), X]).T

# Linear regression
beta = np.linalg.inv(X.T @ X) @ X.T @ y

print(f"Intercept: {beta[0]:.2f}")
print(f"Coefficient: {beta[1]:.2f}")

Intercept: 0.07
Coefficient: 1.89


## 3. SciPy

In [6]:
from scipy import stats

# Sample data
data1 = [10, 11, 14, 15, 18, 19, 21]
data2 = [16, 18, 20, 21, 22, 24, 26]

# Perform t-test
t_stat, p_val = stats.ttest_ind(data1, data2)

print(f"T-statistic: {t_stat:.2f}")
print(f"P-value: {p_val:.2f}")

T-statistic: -2.76
P-value: 0.02


## 4. Statsmodels

In [7]:
import statsmodels.api as sm
import numpy as np

# Example data
X = np.random.rand(100)
y = 2 * X + np.random.randn(100) * 0.3

X = sm.add_constant(X)

# Fitting the regression model
model = sm.OLS(y, X).fit()

# Model summary
print(model.summary())


                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.812
Model:                            OLS   Adj. R-squared:                  0.810
Method:                 Least Squares   F-statistic:                     423.9
Date:                Wed, 14 Aug 2024   Prob (F-statistic):           2.27e-37
Time:                        09:57:59   Log-Likelihood:                -14.206
No. Observations:                 100   AIC:                             32.41
Df Residuals:                      98   BIC:                             37.62
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0420      0.053      0.800      0.4

## 5. Pingouin

In [2]:
! pip install pingouin

Collecting pingouin
  Downloading pingouin-0.5.4-py2.py3-none-any.whl.metadata (1.1 kB)
Collecting pandas-flavor (from pingouin)
  Downloading pandas_flavor-0.6.0-py3-none-any.whl.metadata (6.3 kB)
Downloading pingouin-0.5.4-py2.py3-none-any.whl (198 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m198.9/198.9 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pandas_flavor-0.6.0-py3-none-any.whl (7.2 kB)
Installing collected packages: pandas-flavor, pingouin
Successfully installed pandas-flavor-0.6.0 pingouin-0.5.4


In [3]:
import pingouin as pg
import pandas as pd

# Sample data
data = pd.DataFrame({
	'Value': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
	'Group': ['A', 'A', 'A', 'A', 'B', 'B', 'B', 'C', 'C', 'D', 'D', 'E']
})

# Perform ANOVA
anova = pg.anova(data=data, dv='Value', between='Group')

print(anova)

  Source  ddof1  ddof2         F     p-unc       np2
0  Group      4      7  29.53125  0.000178  0.944056
