In [None]:
# ----------------------------------------
# Assignment-1 : PDF Learning
# Roll Number = 102303778
# ----------------------------------------

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# ----------------------------------------
# STEP 1 : Load Dataset
# ----------------------------------------

# Change this path to where your CSV is stored
file_path = "data.csv"

df = pd.read_csv(file_path, encoding="latin1")

# Extract NO2 column and remove missing values
x = df["no2"].dropna().astype(float)

print("Total samples:", len(x))
print("NO2 Statistics:")
print(x.describe())

# ----------------------------------------
# STEP 2 : Compute Roll Number Parameters
# ----------------------------------------

r = 102303778

a_r = 0.05 * (r % 7)
b_r = 0.3 * (r % 5 + 1)

print("\na_r =", a_r)
print("b_r =", b_r)

# ----------------------------------------
# STEP 3 : Transform x â†’ z
# ----------------------------------------

z = x + a_r * np.sin(b_r * x)

print("\nTransformed variable statistics:")
print(z.describe())

# ----------------------------------------
# STEP 4 : Learn PDF Parameters
# p(z) = c * exp(-lambda * (z - mu)^2)
# ----------------------------------------

mu = np.mean(z)                 # Mean
variance = np.var(z)            # Variance
lambda_est = 1 / (2 * variance) # Lambda
c_est = np.sqrt(lambda_est / np.pi)  # Normalization constant

print("\nLearned Parameters:")
print("mu =", mu)
print("lambda =", lambda_est)
print("c =", c_est)

# ----------------------------------------
# STEP 5 : Plot Histogram and Fitted PDF
# ----------------------------------------

# Sample data for faster plotting
z_sample = z.sample(50000, random_state=1)

# Histogram
counts, bins = np.histogram(z_sample, bins=100, density=True)
bin_centers = (bins[:-1] + bins[1:]) / 2

# PDF curve
pdf = c_est * np.exp(-lambda_est * (bin_centers - mu)**2)

plt.figure()
plt.hist(z_sample, bins=100, density=True, alpha=0.5)
plt.plot(bin_centers, pdf)
plt.title("Histogram of Transformed Variable z with Fitted PDF")
plt.xlabel("z")
plt.ylabel("Probability Density")
plt.show()
