Step 1: Create a sample pavement dataset ---


In [None]:
import pandas as pd
import numpy as np
from scipy import stats

data = {
    'Section': ['S1','S2','S3','S4','S5','S6','S7','S8'],
    'Surface': ['Asphalt','Asphalt','Concrete','Asphalt','Concrete','Asphalt','Asphalt','Concrete'],
    'Traffic': ['High','Medium','Low','High','Medium','Low','Medium','High'],
    'Age': [12,8,5,15,10,7,9,20],
    'IRI': [4.2,3.1,1.8,4.8,2.5,3.0,3.3,4.5],
    'Crack': [35,25,5,40,10,18,20,32],
    'PCI': [45,60,88,40,80,70,68,500],
    'Rut': [14,10,5,16,6,8,9,12]
}

df = pd.DataFrame(data)
print("=== Pavement Dataset ===")
print(df, "\n")
print(df.shape)
print(df.dtypes)
print(df.info())


Step 2: Basic descriptive statistics ---


In [None]:
print("=== Descriptive Statistics ===")
print(df[['Age','IRI','Crack','PCI','Rut']].describe(), "\n")

Step 3: Mean, Median, Mode ---
What is the average roughness (IRI) of our network?
If one section is extremely poor, does it affect overall assessment?
Which surface type dominates the network?

In [None]:
mean_iri = df['IRI'].mean()
median_iri = df['IRI'].median()
mode_surface = df['Surface'].mode()[0]
print(f"Mean IRI: {mean_iri:.2f}")
print(f"Median IRI: {median_iri:.2f}")
print(f"Mode Surface Type: {mode_surface}")
print("Inference: The network has moderate roughness (mean IRI ~3.4), mostly Asphalt pavements.\n")

Step 4: Standard Deviation and Variance ---
Is performance consistent across the network?

In [None]:
std_pci = df['PCI'].std()
var_pci = df['PCI'].var()
print(f"Standard Deviation of PCI: {std_pci:.2f}")
print(f"Variance of PCI: {var_pci:.2f}")
print("Inference: High standard deviation indicates uneven performance; some pavements are failing while others are good.\n")

Step 5: Quartiles (Q1, Q3, IQR) ---
What proportion of roads are critical vs. acceptable?

In [None]:
Q1 = df['PCI'].quantile(0.25)
Q3 = df['PCI'].quantile(0.75)
IQR = Q3 - Q1
print(f"Q1 (25th percentile): {Q1:.2f}")
print(f"Q3 (75th percentile): {Q3:.2f}")
print(f"IQR (Interquartile Range): {IQR:.2f}")
print("Inference: 25% of pavements below PCI 48 need urgent rehabilitation.\n")

Step 6: Skewness and Kurtosis ---
Is the distribution of PCI biased toward poor or good pavements?
Are there extreme conditions in pavement performance?

In [None]:
skew_pci = stats.skew(df['PCI'])
kurt_pci = stats.kurtosis(df['PCI'])
print(f"Skewness of PCI: {skew_pci:.2f}")
print(f"Kurtosis of PCI: {kurt_pci:.2f}")
if skew_pci < 0:
    print("Inference: Left-skewed → majority of pavements are in good condition, few are very poor.")
else:
    print("Inference: Right-skewed → most pavements are poor, few are excellent.")
print(f"Kurtosis ({kurt_pci:.2f}) → peaked distribution, indicating most PCI values cluster near the mean.\n")

Step 7: Covariance and Correlation ---
Does older pavement tend to have higher roughness?
How strong is the relationship between Age and PCI?

In [None]:
cov_age_pci = df['Age'].cov(df['PCI'])
corr_age_pci = df['Age'].corr(df['PCI'])
corr_matrix = df[['Age','IRI','Crack','PCI','Rut']].corr()

print(f"Covariance (Age vs PCI): {cov_age_pci:.2f}")
print(f"Correlation (Age vs PCI): {corr_age_pci:.2f}")
print("Inference: Negative correlation (r ≈ -0.85) shows that as age increases, PCI drops sharply.\n")

print("=== Correlation Matrix ===")
print(corr_matrix, "\n")