In [2]:

import numpy as np
import pandas as pd


np.random.seed(42)


ages = np.random.randint(20, 60, n)
incomes = np.random.randint(20000, 80000, n).astype(float)


incomes[[2, 7]] = np.nan

data = pd.DataFrame({
    "Age": ages,
    "Income": incomes
})

print("Synthetic Dataset:")
print(data)




Synthetic Dataset:
   Age   Income
0   58  67191.0
1   48  64131.0
2   34      NaN
3   27  61090.0
4   40  21685.0
5   58  20769.0
6   38  79735.0
7   42      NaN
8   30  22433.0
9   30  25311.0


In [4]:
# Problem 1: Mean, Median, and Age-Weighted Mean of Income


# (a) Mean
mean_income = data["Income"].mean()

# (b) Median
median_income = data["Income"].median()

# (c) Age-weighted mean (ignore NaNs)
valid_data = data.dropna()
weighted_mean_income = np.average(valid_data["Income"], weights=valid_data["Age"])

print("\nProblem 1 Results:")
print(f"Mean Income = {mean_income:.2f}")
print(f"Median Income = {median_income:.2f}")
print(f"Age-Weighted Mean Income = {weighted_mean_income:.2f}")

print("\nExplanation: Weighted mean is preferable when some data points "
      "carry more importance than others (e.g., age, credit hours, population weights).")




Problem 1 Results:
Mean Income = 45293.12
Median Income = 43200.50
Age-Weighted Mean Income = 46076.13

Explanation: Weighted mean is preferable when some data points carry more importance than others (e.g., age, credit hours, population weights).


In [5]:
#Problem 2: Standardize Income (Z-score) and Outlier Detection

from scipy.stats import zscore

# Compute z-scores (ignoring NaNs safely)
data["Income_z"] = (data["Income"] - data["Income"].mean(skipna=True)) / data["Income"].std(skipna=True)

# Identify outliers using |z| > 3
outliers = data["Income_z"].abs() > 3
num_outliers = outliers.sum()

print("\nProblem 2 Results:")
print(data[["Age", "Income", "Income_z"]])
print(f"Number of Outliers = {num_outliers}")


Problem 2 Results:
   Age   Income  Income_z
0   58  67191.0  0.878346
1   48  64131.0  0.755607
2   34      NaN       NaN
3   27  61090.0  0.633629
4   40  21685.0 -0.946946
5   58  20769.0 -0.983688
6   38  79735.0  1.381499
7   42      NaN       NaN
8   30  22433.0 -0.916943
9   30  25311.0 -0.801504
Number of Outliers = 0


In [6]:
# Problem 3: Age Binning and Statistics


bins = [18, 25, 35, 45, 60]
labels = ["18-25", "25-35", "35-45", "45-60"]

data["Age_bin"] = pd.cut(data["Age"], bins=bins, labels=labels, right=False)

# Compute required stats
bin_stats = data.groupby("Age_bin").agg(
    count_obs=("Income", "count"),
    mean_income=("Income", "mean"),
    median_income=("Income", "median")
).reset_index()

print("\nProblem 3 Results:")
print(bin_stats.sort_values("Age_bin"))


Problem 3 Results:
  Age_bin  count_obs  mean_income  median_income
0   18-25          0          NaN            NaN
1   25-35          3      36278.0        25311.0
2   35-45          2      50710.0        50710.0
3   45-60          3      50697.0        64131.0


  bin_stats = data.groupby("Age_bin").agg(


In [7]:
 #Problem 4: Array Operations


arr = np.array([[1, 2, 3], [4, 5, 6]])   # 2D Array

print("\nProblem 4 Results:")

# Shape, Size, Transpose, Flatten
print("Original Array:\n", arr)
print("Shape:", arr.shape)
print("Size:", arr.size)
print("Transpose:\n", arr.T)
print("Flatten:", arr.flatten())




Problem 4 Results:
Original Array:
 [[1 2 3]
 [4 5 6]]
Shape: (2, 3)
Size: 6
Transpose:
 [[1 4]
 [2 5]
 [3 6]]
Flatten: [1 2 3 4 5 6]


In [8]:
# Negative indexing
print("Negative indexing arr[-1][-1] =", arr[-1][-1])
try:
    print(arr[:, 3])  # This will raise an IndexError
except Exception as e:
    print("Error in slicing:", e)




Negative indexing arr[-1][-1] = 6
Error in slicing: index 3 is out of bounds for axis 1 with size 3


In [9]:
# Arithmetic operations – Broadcasting
print("Broadcasting (arr + 10):\n", arr + 10)




Broadcasting (arr + 10):
 [[11 12 13]
 [14 15 16]]


In [10]:
# Dot product
print("Dot Product (arr @ arr.T):\n", arr @ arr.T)



Dot Product (arr @ arr.T):
 [[14 32]
 [32 77]]


In [11]:
# Linear Algebra (square matrix)
sq_arr = np.array([[2, 1], [7, 4]])
det = np.linalg.det(sq_arr)
inv = np.linalg.inv(sq_arr)

print("Square Matrix:\n", sq_arr)
print("Determinant:", det)
print("Inverse:\n", inv)

Square Matrix:
 [[2 1]
 [7 4]]
Determinant: 0.9999999999999996
Inverse:
 [[ 4. -1.]
 [-7.  2.]]
