# The _SciPy.Stats_ Module
* The **_scipy.stats_** module is a powerful library in Python used for statistical analysis and probability distributions.
* It provides a wide range of functions and classes for various statistical calculations, hypothesis testing, and generating random variables from different distributions. 

## 1. Random Variables
### Generating Random Numbers
Uniform, Normal, Exponential, etc.

In [None]:
from scipy.stats import uniform, norm, expon

# Example: Uniform distribution
data1 = uniform.rvs(size=100)
print(data1)

print()

# Example: Normal distribution
data2 = norm.rvs(size=100)
print(data2)

print()

# Example: Exponential distribution
data3 = expon.rvs(size=100)
print(data3)

## 2. Probability Distributions
### Continuous Distributions
Normal, Exponential, Chi-Square, etc.

In [None]:
from scipy.stats import norm, expon, chi2

# Example: Normal distribution
data = norm.rvs(loc=0, scale=1, size=1000)  # Generate random numbers
print(data)

print()

mean, variance = norm.fit(data)  # Fit distribution to data

print("The mean is", mean)

print()

print("The variance is", variance)

### Discrete Distributions
Poisson, Binomial, Geometric, etc.

In [None]:
from scipy.stats import poisson, binom, geom

# Example: Binomial distribution
successes = binom.rvs(n=10, p=0.5, size=1000)  # Simulate 1000 trials
print(successes)

print()

mean, variance = binom.stats(n=10, p=0.5)  # Calculate statistics
print("The mean is", mean)
print()
print("The variance is", variance)

## 3. Statistical Functions
### Descriptive Statistics
Mean, Minimum, Maximum, Variance, Skewness, Kurtosis, etc.

In [None]:
from scipy.stats import describe, norm
import numpy as np

# Generate some random data from a normal distribution
data = norm.rvs(loc=0, scale=1, size=100)
print(data)

print()

# Calculate descriptive statistics using describe
desc = describe(data)
print(desc)

print()

# Print the descriptive statistics
print("Descriptive Statistics:")
print("Mean:", desc.mean)
print("Minimum:", desc.minmax[0])
print("Maximum:", desc.minmax[1])
print("Variance:", desc.variance)
print("Skewness:", desc.skewness)
print("Kurtosis:", desc.kurtosis)

### Hypothesis Testing
t-tests, ANOVA, Kolmogorov-Smirnov test, etc.

In [None]:
from scipy.stats import ttest_ind, f_oneway, ks_2samp

# Example: t-test
group1 = norm.rvs(loc=0, scale=1, size=100)
print(group1)

print()

group2 = norm.rvs(loc=0.5, scale=1, size=100)
print(group2)

print()

t_stat, p_value = ttest_ind(group1, group2)
print("Test statistic:", t_stat)
print("P-value:", p_value)

## 4. Statistical Models
### Linear Regression

In [None]:
from scipy.stats import linregress

# Example: Linear regression
x = np.array([0, 1, 2, 3, 4])
y = np.array([0, 0.8, 0.9, 0.1, -0.8])
slope, intercept, r_value, p_value, std_err = linregress(x, y)

print("slope: ", slope)
print("intercept: ", intercept)
print("r_value: ", r_value)
print("p_value: ", p_value)
print("std_err: ", std_err)

$y=0.66x-0.23$

In [None]:
from scipy.stats import linregress

# Example: Linear regression
math_score = np.array([70, 92, 45, 48, 76, 65, 67, 83, 54, 29])
phys_score = np.array([87, 58, 64, 56, 88, 43, 72, 53, 72, 55])

slope, intercept, r_value, p_value, std_err = linregress(math_score, phys_score)

print("slope: ", slope)
print("intercept: ", intercept)
print("p_value: ", p_value)

$\text{Physics Score}=(0.1298\times \text{Math Score})+56.6353$

## 5. Probability Density Functions and Cumulative Distribution Functions
### PDFs and CDFs
Compute probability density function (PDF) and cumulative distribution function (CDF) for various distributions.

In [None]:
from scipy.stats import norm

# Example: Normal distribution PDF and CDF
x = np.linspace(-5, 5, 100)

pdf_values = norm.pdf(x, loc=0, scale=1)  # PDF values
cdf_values = norm.cdf(x, loc=0, scale=1)  # CDF values

print("pdf_values: ", pdf_values, "\n")
print("cdf_values: ", cdf_values)

## 6. Multivariate Distributions and Tests:
### Multivariate Normal Distribution
Generate samples and perform statistical tests

In [None]:
from scipy.stats import multivariate_normal

# Example: Multivariate normal distribution
mean = [0, 0]
cov = [[1, 0.5], [0.5, 1]]
mv_normal = multivariate_normal(mean, cov)
samples = mv_normal.rvs(size=100)

print("samples: ", samples)

## 7. Non-parametric Methods
### Kernel Density Estimation (KDE)
Estimate the probability density function of a random variable.

In [None]:
from scipy.stats import gaussian_kde

# Example: Kernel Density Estimation (KDE)
data = np.random.normal(size=1000)


kde = gaussian_kde(data)

# Generate a grid of points for evaluation
x_grid = np.linspace(min(data), max(data), 100)

estimated_density = kde.evaluate(x_grid)  # Evaluate density on a grid

print("data: ", data, "\n")
print("x_grid: ", x_grid, "\n")
print("estimated_density: ", estimated_density)

## 8. Statistical Tests and Measures:
### Goodness-of-fit Tests
Test if data follows a specific distribution (e.g., chi-square test).

In [None]:
from scipy.stats import chisquare

# Example: Chi-square goodness-of-fit test
observed = np.array([10, 15, 8, 12, 5])
expected = np.array([10, 10, 10, 10, 10])

chi2_stat, p_value = chisquare(observed, expected)

print("chi2_stat: ", chi2_stat)
print("p_value: ", p_value)

## 9. Spatial Statistics
### Point Pattern Analysis
Analyze spatial distributions of points (e.g., nearest neighbor analysis).

In [None]:
from scipy.stats import pointbiserialr

# Example: Point biserial correlation
data = np.random.normal(size=100)
binary_data = np.random.randint(0, 2, size=100)

correlation, p_value = pointbiserialr(binary_data, data)

print("correlation: ", correlation)
print("p_value: ", p_value)

In [None]:
import numpy as np
from scipy.stats import norm
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 1)
x = np.linspace(norm.ppf(0.01),
                norm.ppf(0.99), 100)
ax.plot(x, norm.pdf(x),
       'r-', lw=5, alpha=0.6, label='norm pdf')