In [48]:
import os
import numpy as np
import pandas as pd

# Set the number of data points
N = 2_000_000
ROUND_NO = 2

# Set a random seed for reproducibility
np.random.seed(42)

#PATH
PATH = 'Lesson 4/'
def add_path(filename):
    folder_dir = os.path.join(os.getcwd(), PATH)
    return os.path.join(folder_dir, filename)

In [49]:
# ===================================================
# Part 1: Simple Linear Regression (data_beam.csv)
# deflection = 0.05 * load + noise
# ===================================================
print("Generating Beam Data...")
# Load (x): 0 to 500 kN
load_kN = np.random.uniform(0, 500, N)

# Noise: very small, mean 0, std dev 0.5 mm
noise_beam = np.random.normal(0, 1, N)

# Deflection (y): a clear linear relationship
# y = 0.05x + b
deflection_mm = (0.05 * load_kN) + 2.5 + noise_beam  # Adding a y-intercept (2.5)

# Create DataFrame
df_beam = pd.DataFrame({
    'load_kN': load_kN,
    'deflection_mm': deflection_mm
})
df_beam = df_beam.map(lambda x: round(x, ROUND_NO))

# Save to CSV
df_beam.to_csv(add_path('data_beam.csv'), index=False)
print("Saved data_beam.csv")

Generating Beam Data...
Saved data_beam.csv


In [50]:
# ===================================================
# Part 2: Polynomial Regression (data_drag.csv)
# drag_force = 1.2 * velocity^2 + noise
# ===================================================
print("Generating Drag Data...")
# Velocity (x): 0 to 40 m/s
velocity_mps = np.random.uniform(0, 40, N)

# Noise: mean 0, std dev 10 N
noise_drag = np.random.normal(0, 10, N)

# Drag Force (y): a clear quadratic relationship
# y = 1.2 * x^2 + noise
drag_force_N = (1.2 * velocity_mps**2) + noise_drag

# Create DataFrame
df_drag = pd.DataFrame({
    'velocity_mps': velocity_mps,
    'drag_force_N': drag_force_N
})

df_drag = df_drag.map(lambda x: round(x, ROUND_NO))
# Save to CSV
df_drag.to_csv(add_path('data_drag.csv'), index=False)
print("Saved data_drag.csv")


Generating Drag Data...
Saved data_drag.csv


In [51]:
# ===================================================
# Part 3: Multiple Linear Regression (data_settlement.csv)
# settlement = 0.8*pressure + 5.0*thickness + noise
# ===================================================
print("Generating Settlement Data...")
# Pressure (x1): 50 to 500 kPa
pressure_kPa = np.random.uniform(50, 500, N)

# Thickness (x2): 2 to 10 m
thickness_m = np.random.uniform(2, 10, N)

# Noise: mean 0, std dev 2 mm
noise_settlement = np.random.normal(0, 10, N)

# Settlement (y): a clear multi-linear relationship
# y = 0.8*x1 + 5.0*x2 + b
settlement_mm = (0.8 * pressure_kPa) + (5.0 * thickness_m) + 10.0 + noise_settlement # Adding intercept (10.0)

# Create DataFrame
df_settlement = pd.DataFrame({
    'pressure_kPa': pressure_kPa,
    'thickness_m': thickness_m,
    'settlement_mm': settlement_mm
})

df_settlement = df_settlement.map(lambda x: round(x, ROUND_NO))

# Save to CSV
df_settlement.to_csv(add_path('data_settlement.csv'), index=False)
print("Saved data_settlement.csv")

print("\nData generation complete.")

Generating Settlement Data...
Saved data_settlement.csv

Data generation complete.


## Testing

In [52]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score
from IPython.display import display, Latex

def regression(reg, x, y):
    reg.fit(x, y)
    y_pred = reg.predict(x)
    r2 = r2_score(y, y_pred)
    display(Latex(f'$R^2$: {r2:.2f}'))


#Part 1
df = pd.read_csv(add_path('data_beam.csv'))

x = df[['load_kN']]
y = df['deflection_mm']

reg = LinearRegression()
regression(reg, x, y)

<IPython.core.display.Latex object>

In [53]:
#Part 2
df = pd.read_csv(add_path('data_drag.csv'))

x = df[['velocity_mps']]
y = df['drag_force_N']

reg = LinearRegression()
regression(reg, x, y)

<IPython.core.display.Latex object>

In [54]:
# Part 3
df = pd.read_csv(add_path('data_settlement.csv'))

x = df[['pressure_kPa', 'thickness_m']]
y = df['settlement_mm']

reg = LinearRegression()
regression(reg, x, y)

<IPython.core.display.Latex object>