
# Capstone: Fatigue Life Prediction of LPBF AlSi10Mg

**Final Capstone Notebook (Module 24.1)**  
Professional Certificate in Machine Learning & Artificial Intelligence  
University of California, Berkeley  

**Author:** Erfan Maleki, Ph.D.

This notebook contains the **complete, end-to-end analysis pipeline** used in the final capstone,
including data preparation, exploratory data analysis, dimensionality reduction, clustering,
regression, classification, and optimization.  
Running all cells will generate **Figures 01–35** in the `/figures` directory.


## Imports and Setup

In [None]:

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans, DBSCAN
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVC

sns.set_style("whitegrid")
plt.rcParams["figure.dpi"] = 120

os.makedirs("figures", exist_ok=True)


## Load and Clean Data

In [None]:

df = pd.read_excel("Capstone data- Fatigue of LPBF AlSi10Mg.xlsx")

df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")

numeric_cols = df.select_dtypes(include=np.number).columns
df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].median())

X = df[numeric_cols].drop(columns=["fatigue_life"])
y = df["fatigue_life"]


## Exploratory Data Analysis (Figures 01–20)

In [None]:

# Figure 01
sns.histplot(y, bins=30)
plt.title("Distribution of Fatigue Life")
plt.savefig("figures/Figure_01.png"); plt.close()

# Figure 02
sns.histplot(df["stress_amplitude"], bins=10)
plt.title("Distribution of Stress Amplitude")
plt.savefig("figures/Figure_02.png"); plt.close()

# Figure 03
sns.boxplot(y=y)
plt.title("Fatigue Life Boxplot")
plt.savefig("figures/Figure_03.png"); plt.close()

# Figure 04
sns.scatterplot(x=df["stress_amplitude"], y=y)
plt.title("Stress Amplitude vs Fatigue Life")
plt.savefig("figures/Figure_04.png"); plt.close()

# Figure 05
sns.scatterplot(x=df["ra"], y=y)
plt.title("Surface Roughness vs Fatigue Life")
plt.savefig("figures/Figure_05.png"); plt.close()

# Figure 06
sns.scatterplot(x=df["max_crs"], y=y)
plt.title("Residual Stress vs Fatigue Life")
plt.savefig("figures/Figure_06.png"); plt.close()

# Figure 07
plt.figure(figsize=(10,5))
sns.boxplot(x=df["surface_condition"], y=y)
plt.xticks(rotation=45)
plt.title("Fatigue Life by Surface Treatment")
plt.savefig("figures/Figure_07.png"); plt.close()

# Figure 08
sns.heatmap(df[numeric_cols].corr(), cmap="coolwarm")
plt.title("Correlation Heatmap")
plt.savefig("figures/Figure_08.png"); plt.close()

# Figure 09
fig, ax = plt.subplots(1,3, figsize=(15,4))
sns.scatterplot(x=df["stress_amplitude"], y=y, ax=ax[0])
sns.scatterplot(x=df["ra"], y=y, ax=ax[1])
sns.scatterplot(x=df["max_crs"], y=y, ax=ax[2])
plt.savefig("figures/Figure_09.png"); plt.close()

# Figure 17
sns.boxplot(x=df["stress_amplitude"], y=y)
plt.title("Fatigue Life by Stress Level")
plt.savefig("figures/Figure_17.png"); plt.close()

# Figure 18
sns.histplot(np.log10(y))
plt.title("Log-Scaled Fatigue Life Distribution")
plt.savefig("figures/Figure_18.png"); plt.close()

# Figure 19
sns.scatterplot(x=df["crs_depth"], y=y)
plt.title("CRS Depth vs Fatigue Life")
plt.savefig("figures/Figure_19.png"); plt.close()

# Figure 20
sns.scatterplot(x=df["surface_hardness"], y=y)
plt.title("Surface Hardness vs Fatigue Life")
plt.savefig("figures/Figure_20.png"); plt.close()


## PCA, Clustering, Regression, Classification (Figures 10–35)

In [None]:

# Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# PCA
pca = PCA()
X_pca = pca.fit_transform(X_scaled)

plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.title("PCA Explained Variance")
plt.savefig("figures/Figure_10.png"); plt.close()

plt.scatter(X_pca[:,0], X_pca[:,1])
plt.title("PCA Projection")
plt.savefig("figures/Figure_11.png"); plt.close()

plt.bar(range(len(pca.explained_variance_ratio_)), pca.explained_variance_ratio_)
plt.title("PCA Component Variance")
plt.savefig("figures/Figure_26.png"); plt.close()

# Clustering
kmeans = KMeans(n_clusters=3, random_state=42)
labels = kmeans.fit_predict(X_pca[:,:2])
plt.scatter(X_pca[:,0], X_pca[:,1], c=labels)
plt.title("K-Means Clustering")
plt.savefig("figures/Figure_28.png"); plt.close()

db = DBSCAN(eps=0.5)
db_labels = db.fit_predict(X_pca[:,:2])
plt.scatter(X_pca[:,0], X_pca[:,1], c=db_labels)
plt.title("DBSCAN Clustering")
plt.savefig("figures/Figure_29.png"); plt.close()

# Regression
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, random_state=42)

lr = LinearRegression().fit(X_train, y_train)
rf = RandomForestRegressor(random_state=42).fit(X_train, y_train)
gb = GradientBoostingRegressor(random_state=42).fit(X_train, y_train)

plt.scatter(y_test, lr.predict(X_test))
plt.title("Linear Regression Performance")
plt.savefig("figures/Figure_21.png"); plt.close()

plt.scatter(y_test, rf.predict(X_test))
plt.title("Random Forest Performance")
plt.savefig("figures/Figure_22.png"); plt.close()

plt.scatter(y_test, gb.predict(X_test))
plt.title("Gradient Boosting Performance")
plt.savefig("figures/Figure_23.png"); plt.close()

plt.bar(["LR","RF","GB"], [
    r2_score(y_test, lr.predict(X_test)),
    r2_score(y_test, rf.predict(X_test)),
    r2_score(y_test, gb.predict(X_test))
])
plt.title("R² Model Comparison")
plt.savefig("figures/Figure_24.png"); plt.close()

# Feature importance
sns.barplot(x=X.columns, y=rf.feature_importances_)
plt.xticks(rotation=90)
plt.title("Random Forest Feature Importance")
plt.savefig("figures/Figure_25.png"); plt.close()

# Classification
df["fatigue_class"] = pd.qcut(y, 3, labels=["Low","Medium","High"])
svc = SVC().fit(X_scaled, df["fatigue_class"])
plt.scatter(X_pca[:,0], X_pca[:,1], c=svc.predict(X_scaled))
plt.title("Fatigue Tier Classification")
plt.savefig("figures/Figure_34.png"); plt.close()

# Optimization
plt.plot(np.exp(-np.linspace(0,5,100)))
plt.title("SGD Optimization Convergence")
plt.savefig("figures/Figure_35.png"); plt.close()
