<a href="https://colab.research.google.com/github/Vishu52/9d9b93d3-2a90-4e7f-96c1-7607df7a178f/blob/main/Statice_Question.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Q1 List of 100 integers (90â€“130) + Statistical Measures

import numpy as np
import random
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns

# ------------------------------
# PART 1: LIST OF 100 INTEGERS
# ------------------------------
int_list = [random.randint(90, 130) for _ in range(100)]

def calculate_mean(lst):
    return sum(lst) / len(lst)

def calculate_median(lst):
    lst_sorted = sorted(lst)
    n = len(lst)
    if n % 2 == 1:
        return lst_sorted[n//2]
    return (lst_sorted[n//2 - 1] + lst_sorted[n//2]) / 2

def calculate_mode(lst):
    return stats.mode(lst, keepdims=True).mode[0]

def weighted_mean(values, weights):
    return np.average(values, weights=weights)

def geometric_mean(lst):
    lst = np.array(lst)
    return lst.prod() ** (1/len(lst))

def harmonic_mean(lst):
    return len(lst) / sum(1/x for x in lst)

def midrange(lst):
    return (min(lst) + max(lst)) / 2

def trimmed_mean(lst, percent):
    return stats.trim_mean(lst, percent / 100)

# ------------------------------
# PART 2: LIST OF 500 INTEGERS
# ------------------------------
int_list2 = [random.randint(200, 300) for _ in range(500)]

def visualize_data(lst):
    sns.histplot(lst, kde=False)
    sns.kdeplot(lst, color="red")
    plt.show()
    sns.kdeplot(lst)
    plt.show()
    sns.kdeplot(lst)
    sns.histplot(lst, kde=False)
    plt.show()

def data_range(lst):
    return max(lst) - min(lst)

def variance(lst):
    return np.var(lst, ddof=1)

def std_dev(lst):
    return np.std(lst, ddof=1)

def iqr(lst):
    return np.percentile(lst, 75) - np.percentile(lst, 25)

def coefficient_of_variation(lst):
    return std_dev(lst) / calculate_mean(lst)

def mad(lst):
    mean_val = calculate_mean(lst)
    return np.mean([abs(x - mean_val) for x in lst])

def quartile_deviation(lst):
    Q1 = np.percentile(lst, 25)
    Q3 = np.percentile(lst, 75)
    return (Q3 - Q1) / 2

def coefficient_of_dispersion(lst):
    return (max(lst) - min(lst)) / (max(lst) + min(lst))

# ------------------------------
# RANDOM VARIABLE CLASS
# ------------------------------
class DiscreteRV:
    def __init__(self, values, probabilities):
        self.values = values
        self.prob = probabilities
    def expected_value(self):
        return np.sum(np.array(self.values) * np.array(self.prob))
    def variance(self):
        mean = self.expected_value()
        return np.sum((np.array(self.values) - mean)**2 * np.array(self.prob))

# ------------------------------
# DISTRIBUTIONS & SAMPLES
# ------------------------------

def simulate_die(n=10000):
    rolls = np.random.randint(1, 7, n)
    return rolls.mean(), rolls.var()

def sample_distribution(dist, size=1000, **kwargs):
    if dist == "binomial":
        samples = np.random.binomial(kwargs["n"], kwargs["p"], size)
    elif dist == "poisson":
        samples = np.random.poisson(kwargs["lam"], size)
    return samples.mean(), samples.var()

def normal_pdf(x, mean, sd):
    return stats.norm.pdf(x, mean, sd)

def exponential_cdf(x, lam):
    return 1 - np.exp(-lam * x)

def poisson_pmf(k, lam):
    return stats.poisson.pmf(k, lam)

# ------------------------------
# TIPS DATASET ANALYSIS
# ------------------------------
tips = sns.load_dataset("tips")
x = tips["total_bill"]
y = tips["tip"]

def skewness(col):
    return stats.skew(col)

def skew_type(col):
    s = skewness(col)
    if s > 0:
        return "Positive Skew"
    elif s < 0:
        return "Negative Skew"
    return "Approximately Symmetric"

def covariance(x, y):
    return np.cov(x, y)[0, 1]

def pearson_corr(x, y):
    return stats.pearsonr(x, y)[0]

def plot_scatter(x, y):
    plt.scatter(x, y)
    plt.xlabel("Total Bill")
    plt.ylabel("Tip")
    plt.show()

# ------------------------------
# Z-TEST FUNCTIONS
# ------------------------------

def z_test_two_proportions(p1, p2, n1, n2):
    p_pool = (p1*n1 + p2*n2) / (n1 + n2)
    z = (p1 - p2) / np.sqrt(p_pool * (1 - p_pool) * (1/n1 + 1/n2))
    p_val = 2 * (1 - stats.norm.cdf(abs(z)))
    return z, p_val

def z_test_mean(before, after):
    mean_diff = np.mean(after) - np.mean(before)
    sd = np.std(before - after, ddof=1)
    z = mean_diff / (sd / np.sqrt(len(before)))
    p_val = 2 * (1 - stats.norm.cdf(abs(z)))
    return z, p_val

# ------------------------------
# T-TEST FUNCTIONS
# ------------------------------

def t_test_two_samples(a, b):
    return stats.ttest_ind(a, b)

def t_test_paired(before, after):
    return stats.ttest_rel(before, after)

# ------------------------------
# CHI-SQUARE TEST
# ------------------------------

def chi_square_test(table):
    chi2, p, dof, expected = stats.chi2_contingency(table)
    return chi2, p, dof, expected

# ------------------------------
# ANOVA TEST
# ------------------------------

def anova_test(*groups):
    return stats.f_oneway(*groups)

