In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats


data = [12, 15, 14, 10, 18, 20, 18, 12, 15, 22]

# 1. Mean, Median, Mode
print("Mean:", np.mean(data))
print("Median:", np.median(data))
print("Mode:", stats.mode(data, keepdims=True).mode[0])

# 2. Variance and Standard Deviation
print("Variance:", np.var(data))
print("Standard Deviation:", np.std(data))

# 3. Data types
nominal = ['Red', 'Blue', 'Green']
ordinal = ['Low', 'Medium', 'High']
interval = [0, 10, 20, 30]  # No true zero
df_ratio = pd.DataFrame({'Weight': [50, 60, 70], 'Height': [150, 160, 170]})

# 4. Sampling techniques
population = np.arange(1, 101)
random_sample = np.random.choice(population, 10, replace=False)

# Stratified Sampling Example
df = pd.DataFrame({'Group': ['A'] * 50 + ['B'] * 50, 'Value': np.random.randint(1, 100, 100)})
stratified_sample = df.groupby('Group', group_keys=False).apply(lambda x: x.sample(5))

# 5. Function to calculate range
def calc_range(dataset):
    return np.max(dataset) - np.min(dataset)
print("Range:", calc_range(data))

# 6. Histogram to visualize skewness
plt.hist(data, bins=5)
plt.title('Histogram')
plt.show()

# 7. Skewness and Kurtosis
print("Skewness:", stats.skew(data))
print("Kurtosis:", stats.kurtosis(data))

# 8. Positive & Negative skew
pos_skewed = [1, 2, 3, 4, 5, 50]
neg_skewed = [50, 45, 40, 35, 30, 1]
print("Positive Skewness:", stats.skew(pos_skewed))
print("Negative Skewness:", stats.skew(neg_skewed))

# 9. Covariance
x = [1, 2, 3, 4, 5]
y = [2, 4, 6, 8, 10]
print("Covariance:", np.cov(x, y)[0, 1])

# 10. Correlation
print("Correlation Coefficient:", np.corrcoef(x, y)[0, 1])

# 11. Scatter plot
plt.scatter(x, y)
plt.title('Scatter Plot')
plt.xlabel('X')
plt.ylabel('Y')
plt.show()

# 12. Simple Random and Systematic Sampling
simple_random = np.random.choice(population, 10, replace=False)
systematic_sample = population[::10]
print("Simple Random Sample:", simple_random)
print("Systematic Sample:", systematic_sample)

# 13. Grouped data central tendency
grouped_data = pd.cut(data, bins=[0, 10, 15, 20, 25]).value_counts().sort_index()
print("Grouped Data:", grouped_data)

# 14. Simulate and calculate central tendency & dispersion
sim_data = np.random.normal(loc=50, scale=10, size=100)
print("Simulated Mean:", np.mean(sim_data))
print("Simulated SD:", np.std(sim_data))

# 15. Summary stats
df_sim = pd.DataFrame({'Simulated': sim_data})
print(df_sim.describe())

# 16. Boxplot & Outliers
sns.boxplot(data)
plt.title('Boxplot')
plt.show()

# 17. IQR
q75, q25 = np.percentile(data, [75 ,25])
print("IQR:", q75 - q25)

# 18. Z-score normalization
def z_score_normalize(arr):
    return (arr - np.mean(arr)) / np.std(arr)
z_scores = z_score_normalize(data)
print("Z-scores:", z_scores)

# 19. Compare SDs
data2 = [5, 7, 9, 10, 11, 13, 14, 18]
print("SD of data1:", np.std(data))
print("SD of data2:", np.std(data2))

# 20. Covariance heatmap
cov_matrix = np.cov(df_ratio.T)
sns.heatmap(cov_matrix, annot=True, cmap="coolwarm")
plt.title("Covariance Heatmap")
plt.show()

# 21. Correlation matrix with Seaborn
sns.heatmap(df_ratio.corr(), annot=True, cmap='Blues')
plt.title("Correlation Matrix")
plt.show()

# 22. Variance & SD computation
print("Manual Variance:", sum((x - np.mean(data))**2 for x in data) / len(data))
print("Manual SD:", np.sqrt(sum((x - np.mean(data))**2 for x in data) / len(data)))

# 23. Visualize Skewness & Kurtosis
sns.histplot(data, kde=True)
plt.title('Distribution with Skew & Kurtosis')
plt.show()

# 24. Pearson and Spearman Correlation
print("Pearson:", stats.pearsonr(x, y)[0])
print("Spearman:", stats.spearmanr(x, y)[0])

ModuleNotFoundError: No module named 'scipy'