<a href="https://colab.research.google.com/github/YoussefDiaa1/ITI-ripo/blob/main/Probability.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 1. Manual vs NumPy Mean

In [1]:
ages = [18, 21, 20, 19, 22, 23, 20, 21, 22, 19]
manual_mean = sum(ages) / len(ages)
print("Manual Mean:", manual_mean)

Manual Mean: 20.5


In [2]:
import numpy as np
numpy_mean = np.mean(ages)
print("NumPy Mean:", numpy_mean)

NumPy Mean: 20.5


## Median Without Sorting Function

In [3]:
import random
random_list = [random.randint(10, 50) for _ in range(15)]

In [4]:
sorted_list = sorted(random_list) # Manual
n = len(sorted_list)
if n % 2 == 1:
    manual_median = sorted_list[n // 2]
else:
    mid = n // 2
    manual_median = (sorted_list[mid - 1] + sorted_list[mid]) / 2
print("Manual Median:", manual_median)

Manual Median: 29


In [5]:
numpy_median = np.median(random_list)
print("NumPy Median:", numpy_median)

NumPy Median: 29.0


## Range and Data Spread

In [6]:
min_val = min(random_list)
max_val = max(random_list)
data_range = max_val - min_val
print(f"Manual Min: {min_val}, Max: {max_val}, Range: {data_range}")

Manual Min: 16, Max: 47, Range: 31


In [7]:
np_min = np.min(random_list)
np_max = np.max(random_list)
np_range = np_max - np_min
print(f"NumPy Min: {np_min}, Max: {np_max}, Range: {np_range}")

NumPy Min: 16, Max: 47, Range: 31


##  Variance & Standard Deviation

In [8]:
data = [5, 7, 3, 7, 9, 10, 15, 7]
mean = sum(data) / len(data)
squared_diffs = [(x - mean) ** 2 for x in data]
population_var = sum(squared_diffs) / len(data)  # Population variance
sample_var = sum(squared_diffs) / (len(data) - 1)  # Sample variance
std_dev = population_var ** 0.5

print("Manual Population Variance:", population_var)
print("Manual Sample Variance:", sample_var)
print("Manual Standard Deviation:", std_dev)

Manual Population Variance: 11.359375
Manual Sample Variance: 12.982142857142858
Manual Standard Deviation: 3.370367190678191


In [9]:
print("NumPy Population Variance:", np.var(data))
print("NumPy Sample Variance:", np.var(data, ddof=1))
print("NumPy Standard Deviation:", np.std(data))

NumPy Population Variance: 11.359375
NumPy Sample Variance: 12.982142857142858
NumPy Standard Deviation: 3.370367190678191


##  Quartiles & IQR

In [10]:
q1 = np.percentile(data, 25)  # 25th percentile
q2 = np.percentile(data, 50)  # 50th percentile (median)
q3 = np.percentile(data, 75)  # 75th percentile
iqr = q3 - q1

print(f"Q1: {q1}, Q2: {q2}, Q3: {q3}, IQR: {iqr}")

Q1: 6.5, Q2: 7.0, Q3: 9.25, IQR: 2.75


##  Z-Score Normalization

In [11]:
def z_score_normalize(data):
    mean = sum(data) / len(data)
    variance = sum((x - mean) ** 2 for x in data) / len(data)
    std_dev = variance ** 0.5
    z_scores = [(x - mean) / std_dev for x in data]
    return z_scores

data = [5, 7, 3, 7, 9, 10, 15, 7]
z_scores_manual = z_score_normalize(data)
print("Manual Z-Scores:", [round(z, 4) for z in z_scores_manual])

Manual Z-Scores: [-0.853, -0.2596, -1.4464, -0.2596, 0.3338, 0.6305, 2.114, -0.2596]


## Moving Average

In [12]:
data = [random.randint(1, 100) for _ in range(20)]

In [13]:
moving_avg_manual = [
    sum(data[i:i+3]) / 3
    for i in range(len(data) - 2)
]
print("Manual Moving Average:", moving_avg_manual)

Manual Moving Average: [51.333333333333336, 24.333333333333332, 39.333333333333336, 56.0, 83.0, 84.66666666666667, 82.33333333333333, 80.0, 77.66666666666667, 61.0, 46.666666666666664, 42.666666666666664, 56.333333333333336, 69.33333333333333, 66.66666666666667, 77.33333333333333, 67.33333333333333, 51.666666666666664]


In [14]:
window = np.ones(3) / 3
moving_avg_numpy = np.convolve(data, window, mode='valid')
print("NumPy Moving Average:", moving_avg_numpy)

NumPy Moving Average: [51.33333333 24.33333333 39.33333333 56.         83.         84.66666667
 82.33333333 80.         77.66666667 61.         46.66666667 42.66666667
 56.33333333 69.33333333 66.66666667 77.33333333 67.33333333 51.66666667]


## Outlier Detection (IQR Method)

In [15]:
data = [random.randint(1, 50) for _ in range(25)] + [300, 400]

In [16]:
q1 = np.percentile(data, 25)
q3 = np.percentile(data, 75)
iqr = q3 - q1
lower_bound = q1 - 1.5 * iqr
upper_bound = q3 + 1.5 * iqr

outliers = [x for x in data if x < lower_bound or x > upper_bound]
print("Outliers:", outliers)

Outliers: [300, 400]
