# Title: Multivariate Analysis and Dimensionality Reduction in Atmospheric Data

Description: Hands-on Python notebook for exploring relationships, dimensionality reduction, and clustering of atmospheric variables.


In [None]:
# =======================
# 1. Import Required Libraries
# =======================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA, FactorAnalysis
from sklearn.cluster import KMeans
from sklearn.linear_model import LinearRegression

In [None]:
# =======================
# 2. Generate Synthetic Atmospheric Data
# =======================
np.random.seed(42)
n_samples = 100

# Simulate variables: Temperature, O3, NO2, VOC
temperature = np.random.normal(25, 2, n_samples)
ozone = 0.5 * temperature + np.random.normal(0, 1, n_samples)
no2 = np.random.normal(30, 5, n_samples)
voc = 0.8 * no2 + np.random.normal(0, 2, n_samples)

# Create DataFrame
df = pd.DataFrame({
    'Temperature': temperature,
    'O3': ozone,
    'NO2': no2,
    'VOC': voc
})

df.head()

In [None]:
# =======================
# 3. Correlation Analysis
# =======================
corr = df.corr()
plt.figure(figsize=(7,6))
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix of Atmospheric Variables')
plt.show()

In [None]:
# =======================
# 4. Principal Component Analysis (PCA)
# =======================
# Standardize data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df)

# Apply PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

print("Explained variance ratio:", pca.explained_variance_ratio_)

# Plot PCA
plt.figure(figsize=(7,6))
plt.scatter(X_pca[:,0], X_pca[:,1])
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.title('PCA of Atmospheric Variables')
plt.show()

# PCA loadings
loadings = pd.DataFrame(pca.components_.T, columns=['PC1','PC2'], index=df.columns)
print("PCA Loadings:\n", loadings)


In [None]:
# =======================
# 5. Factor Analysis (Optional)
# =======================
fa = FactorAnalysis(n_components=2, random_state=42)
X_fa = fa.fit_transform(X_scaled)

plt.figure(figsize=(7,6))
plt.scatter(X_fa[:,0], X_fa[:,1])
plt.xlabel('Factor 1')
plt.ylabel('Factor 2')
plt.title('Factor Analysis of Atmospheric Variables')
plt.show()

In [None]:
# =======================
# 6. Clustering Analysis
# =======================
kmeans = KMeans(n_clusters=2, random_state=42)
clusters = kmeans.fit_predict(X_scaled)

plt.figure(figsize=(7,6))
plt.scatter(X_pca[:,0], X_pca[:,1], c=clusters, cmap='viridis')
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.title('K-Means Clustering on PCA Components')
plt.show()

In [None]:
# =======================
# 7. Multivariate Regression
# =======================
X = df[['Temperature','NO2','VOC']]
y = df['O3']

model = LinearRegression()
model.fit(X, y)

print("Regression coefficients:", model.coef_)
print("Intercept:", model.intercept_)

# Predict and visualize
y_pred = model.predict(X)
plt.figure(figsize=(7,6))
plt.scatter(y, y_pred)
plt.xlabel('Observed O3')
plt.ylabel('Predicted O3')
plt.title('Multivariate Regression Prediction')
plt.plot([y.min(), y.max()], [y.min(), y.max()], 'r--')
plt.show()

In [None]:
# =======================
# 8. Summary
# =======================
print("""
Summary:
- Explored correlations among multiple atmospheric variables.
- Applied PCA for dimensionality reduction and visualized principal components.
- Performed factor analysis to extract latent factors.
- Used K-Means clustering to identify patterns in atmospheric observations.
- Built a multivariate regression model to predict O3 using Temperature, NO2, and VOC.
- This notebook provides a foundation for analyzing high-dimensional atmospheric datasets.
""")