<a href="https://colab.research.google.com/github/aglucaci/Bioinformatics-For-All/blob/master/Bioinformatics_For_All_Common_statistical_tests.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import scipy.stats as stats

data = np.random.randn(10000)  # Sample data

mean = np.mean(data)
median = np.median(data)
mode = stats.mode(data, keepdims=True)[0]
variance = np.var(data)
std_dev = np.std(data)

print(f"Mean: {mean}, Median: {median}, Mode: {mode}, Variance: {variance}, Std Dev: {std_dev}")


Mean: 0.005436865559930527, Median: 0.0028202779724284975, Mode: [-4.08181511], Variance: 1.0297581626106909, Std Dev: 1.0147700047846757


In [None]:
#!pip install pymc3

In [None]:
"""
# Common Statistical Tests in Python

This script performs various statistical tests using `numpy`, `scipy.stats`, and `statsmodels`.
It includes descriptive statistics, normality tests, t-tests, ANOVA, non-parametric tests,
correlation tests, chi-square tests, regression analysis, Bayesian inference, and bootstrapping.
"""

import numpy as np
import scipy.stats as stats
import statsmodels.api as sm
#import pymc3 as pm
import matplotlib.pyplot as plt

# """
# ## 1. Descriptive Statistics
# Compute mean, median, mode, variance, and standard deviation.
# """

# New data
data = np.random.randn(100)


print("Mean:", np.mean(data))
print("Median:", np.median(data))
print("Mode:", stats.mode(data, keepdims=True)[0])
print("Variance:", np.var(data))
print("Standard Deviation:", np.std(data))

# """
# ## 2. Normality Tests
# """
# Shapiro-Wilk Test
shapiro_stat, shapiro_p = stats.shapiro(data)
print("Shapiro-Wilk Test: p-value:", shapiro_p)

# Kolmogorov-Smirnov Test
ks_stat, ks_p = stats.kstest(data, 'norm')
print("KS Test: p-value:", ks_p)

# """
# ## 3. T-Tests
# """
# One-sample t-test
t_stat, t_p = stats.ttest_1samp(data, 0)
print("One-Sample T-test: p-value:", t_p)

# Independent (Unpaired) t-test
group1, group2 = np.random.randn(50), np.random.randn(50) + 0.5
t_stat_ind, t_p_ind = stats.ttest_ind(group1, group2)
print("Independent T-test: p-value:", t_p_ind)

# Paired (Dependent) t-test
before, after = np.random.randn(30), np.random.randn(30) + np.random.randn(30) * 0.1
t_stat_rel, t_p_rel = stats.ttest_rel(before, after)
print("Paired T-test: p-value:", t_p_rel)

# """
# ## 4. ANOVA (Comparison of 3+ Groups)
# """
group3 = np.random.randn(50) - 0.5
anova_stat, anova_p = stats.f_oneway(group1, group2, group3)
print("ANOVA: p-value:", anova_p)

# """
# ## 5. Non-Parametric Tests
# """
# Mann-Whitney U Test
mw_stat, mw_p = stats.mannwhitneyu(group1, group2)
print("Mann-Whitney U Test: p-value:", mw_p)

# Wilcoxon Signed-Rank Test
w_stat, w_p = stats.wilcoxon(before, after)
print("Wilcoxon Test: p-value:", w_p)

# Kruskal-Wallis Test
kw_stat, kw_p = stats.kruskal(group1, group2, group3)
print("Kruskal-Wallis Test: p-value:", kw_p)

# """
# ## 6. Correlation Analysis
# """
x, y = np.random.randn(100), np.random.randn(100) + 0.5
print("Pearson Correlation:", stats.pearsonr(x, y))
print("Spearman Correlation:", stats.spearmanr(x, y))

# """
# ## 7. Chi-Square Tests
# """
observed = np.array([30, 14, 34, 22])
expected = np.array([25, 25, 25, 25])
chi_stat, chi_p = stats.chisquare(observed, expected)
print("Chi-Square Goodness of Fit: p-value:", chi_p)

# Chi-Square Test for Independence
table = np.array([[10, 20, 30], [6, 9, 17]])
chi2_stat, chi2_p, _, _ = stats.chi2_contingency(table)
print("Chi-Square Test for Independence: p-value:", chi2_p)

# """
# ## 8. Regression Analysis
# """
X = np.random.randn(100)
Y = 2*X + np.random.randn(100) * 0.5
X = sm.add_constant(X)
model = sm.OLS(Y, X).fit()
print(model.summary())

# """
# ## 9. Bayesian A/B Testing (Two-Proportion Test)
# """
#with pm.Model():
#    p1, p2 = pm.Beta("p1", 1, 1), pm.Beta("p2", 1, 1)
#    trace = pm.sample(1000, return_inferencedata=True)
#pm.plot_posterior(trace)
#plt.show()

# """
# ## 10. Bootstrapping (For Confidence Intervals)
# """
def bootstrap_mean(data, n_bootstrap=1000):
    means = [np.mean(np.random.choice(data, size=len(data), replace=True)) for _ in range(n_bootstrap)]
    return np.percentile(means, [2.5, 97.5])

#ci = bootstrap_mean(data)
#print("Bootstrap 95% Confidence Interval:", ci)

Mean: -0.08103005930446333
Median: 0.006438416656661483
Mode: [-2.37468441]
Variance: 0.8494151217318198
Standard Deviation: 0.9216371963694932
Shapiro-Wilk Test: p-value: 0.7639485254264737
KS Test: p-value: 0.6499116387686261
One-Sample T-test: p-value: 0.38380555667534055
Independent T-test: p-value: 0.7283313818701433
Paired T-test: p-value: 0.06995404469981129
ANOVA: p-value: 0.0002913141945740584
Mann-Whitney U Test: p-value: 0.8442392855727232
Wilcoxon Test: p-value: 0.1459994912147522
Kruskal-Wallis Test: p-value: 0.0007539552940358881
Pearson Correlation: PearsonRResult(statistic=np.float64(-0.08469182607970105), pvalue=np.float64(0.40215595981383695))
Spearman Correlation: SignificanceResult(statistic=np.float64(-0.11908790879087908), pvalue=np.float64(0.23795578993145822))
Chi-Square Goodness of Fit: p-value: 0.023978301316485748
Chi-Square Test for Independence: p-value: 0.873028283380073
                            OLS Regression Results                            
Dep. Va