In [38]:
# cell 0
import pandas as pd
from pathlib import Path

In [39]:
raw_dir = Path("../data/raw") 
csv_files = sorted(raw_dir.glob("*.csv"))

if not csv_files:
    raise FileNotFoundError(f"No CSV files found in {raw_dir.resolve()}")


In [40]:

# Read all CSVs into a dict of DataFrames keyed by filename (without suffix)
dfs = {p.stem: pd.read_csv(p) for p in csv_files}
for name, df in dfs.items():
    print(f"### {name} ###")
    df_no_index = df.reset_index(drop=True)
    try:
        display(df_no_index.style.hide(axis="index"))
    except Exception:
        display(df_no_index)

### table1_model_performance ###


Unnamed: 0,Model,Architecture Type,Accuracy (%),Parameters,FLOPs,Dataset,Computational Cost
0,ResNet50,CNN,97.19,25.0M,4.1B,WM-811K,High
1,Swin Transformer,Transformer,97.47,28.3M,4.5B,MixedWM38,High
2,Vision Transformer (ViT),Transformer,96.77,86.3M,17.6B,MixedWM38,Very High
3,ViT-Tiny,Transformer,98.4,5.7M,1.2B,WM-38k,Medium
4,Autoencoder+CNN,Hybrid,98.56,23.8M,3.2B,WM-811K,High
5,ShuffleNet-v2,Lightweight CNN,96.93,1.3M,146M,WM-811K,Low
6,MobileNetV3,Lightweight CNN,98.0,3.2M,219M,WM-811K,Low
7,EfficientNet V2-S,CNN,97.8,21.5M,8.8B,WM-811K,High
8,Random Forest,Traditional ML,79.5,,,WM-811K,Very Low
9,SVM,Traditional ML,77.5,,,WM-811K,Very Low


### table2_hardware_speeds ###


Unnamed: 0,Model,Hardware,Training Speed (img/s),Inference Speed (img/s),Training Speed CPU (img/s),Inference Speed CPU (img/s),Parameters,FLOPs
0,ResNet18,GPU,66.8,212.4,2.7,7.5,11.2M,1.8B
1,ResNet50,GPU,45.2,180.3,1.9,5.2,25.0M,4.1B
2,MobileNetV3,GPU,480.1,1046.0,5.4,18.4,3.2M,219M
3,ShuffleNetV2,GPU,463.8,912.4,3.9,11.8,1.3M,146M
4,Swin Transformer,GPU,32.1,125.8,1.2,3.4,28.3M,4.5B
5,ViT-Base,GPU,28.5,98.2,0.8,2.1,86.3M,17.6B
6,ViT-Tiny,GPU,142.3,456.7,4.2,12.1,5.7M,1.2B
7,WaferSegClassNet,GPU,892.1,2510.5,25.1,89.3,0.51M,0.2B


### table3_datasets ###


Unnamed: 0,Dataset,Year,Total Samples,Classes,Source,Data Type,Resolution,Label Type,Class Balance
0,WM-811K,2018,811457,9,Real-world fab,Wafer Maps,Variable (6×21 to 300×202),Pass/Fail,14:1
1,MixedWM38,2021,38015,38,Real+Synthetic,Wafer Maps,52×52,Multi-label,Balanced
2,SECOM,2019,1567,2,Real-world fab,Sensor Data,590 features,Binary,14:1
3,MIIC,2020,25276,Multiple,Real SEM,SEM Images,High-resolution,Defect types,Imbalanced
4,Synthetic-22,2018,28600,22,Generated,Wafer Maps,Variable,Multi-class,Balanced


### table4_challenges ###


Unnamed: 0,Challenge,Category,Description,Current Solutions,Priority
0,Sub-5nm Detection,Technical,Stochastic defects below optical resolution li...,"EUV imaging, advanced computational techniques",High
1,Real-time Processing,Technical,Inference time incompatible with production th...,"Edge computing, model optimization, quantization",High
2,Mixed-type Classification,Algorithmic,Multiple overlapping defect patterns,"Multi-label approaches, hierarchical classific...",Medium
3,Class Imbalance,Data,Rare defective samples in production data,"Advanced sampling, data augmentation, loss mod...",High
4,False Positive Rates,Operational,Unnecessary wafer scrapping and yield loss,"Ensemble methods, confidence thresholding",High
5,Limited Labeled Data,Data,Cost and expertise for manual annotation,"Active learning, semi-supervised approaches",Medium
6,Domain Adaptation,Deployment,Models fail across different facilities/processes,"Transfer learning, domain adaptation strategies",Medium
7,Integration Complexity,Industrial,Legacy system compatibility,"Standardized APIs, modular architectures",High
8,Explainability,Regulatory,Black-box models lack interpretability,"Grad-CAM, attention visualization, inherent in...",Medium
9,Computational Cost,Economic,High-performance hardware requirements,"Lightweight models, model compression",High


### table5_future_research ###


Unnamed: 0,Research Direction,Timeline,Key Technologies,Expected Impact,Priority
0,Explainable AI,Short-term (1-2 years),"Grad-CAM integration, attention visualization",Model interpretability for regulatory compliance,High
1,Few-shot Learning,Short-term (1-2 years),"Prototypical networks, MAML",Rapid adaptation to novel defect types,High
2,Edge Computing,Medium-term (2-3 years),"Model compression, specialized accelerators",Real-time processing with low latency,High
3,Multimodal Fusion,Medium-term (2-3 years),"Cross-modal attention, sensor integration",Enhanced accuracy through diverse data sources,Medium
4,Generative Models,Medium-term (2-3 years),"Physics-informed GANs, controllable generation",Synthetic data augmentation for rare defects,Medium
5,Quantum Computing,Long-term (5+ years),Quantum machine learning algorithms,Exponential speedup for pattern recognition,Low
6,Neuromorphic Systems,Long-term (5+ years),"Event-driven processing, spike-based computing",Ultra-low power edge inference,Low
7,AutoML Integration,Short-term (1-2 years),"Neural architecture search, hyperparameter opt...",Automated model development for new processes,Medium
8,Federated Learning,Medium-term (2-3 years),Distributed training across facilities,Privacy-preserving collaborative learning,Medium
9,Digital Twins,Medium-term (2-3 years),"Virtual fab simulation, predictive modeling",Proactive defect prevention,High


### table6_implementation_phases ###


Unnamed: 0,Phase,Duration,Key Activities,Deliverable
0,Data Collection,1-2 months,"Define annotation standards, collect diverse s...",High-quality training data
1,Model Selection,2-4 weeks,Evaluate architectures based on requirements,Optimal accuracy-efficiency trade-off
2,Training Setup,1-2 weeks,"Configure hardware, implement data pipelines",Efficient training infrastructure
3,Validation Strategy,2-3 weeks,Design cross-validation considering temporal s...,Realistic performance estimates
4,Production Integration,1-3 months,"API development, monitoring systems",Seamless deployment
5,Performance Monitoring,Ongoing,"Track accuracy, throughput, resource utilization",Sustained production performance
6,Model Updates,Quarterly,"Continuous learning, retraining protocols",Adaptation to process changes
7,Quality Assurance,Ongoing,"A/B testing, fallback mechanisms",Reliable operation


### table7_evolution_technologies ###


Unnamed: 0,Era,Technology,Key Features,Throughput,Min. Defect Size
0,1980s,Rule-based Systems,"Statistical pattern matching, manual inspection",0.1-30 wph,3μm+
1,1990s,Automated Optical,"Die-to-die comparison, TDI detectors",1-8 wph,0.4μm
2,1995,E-beam SEM,High-resolution scanning electron microscopy,0.05 wph,100nm
3,2000s,Digital Image Processing,"Advanced algorithms, higher throughput",30-50 wph,60-150nm
4,2010s,Machine Learning,"SVM, Random Forest, feature engineering",Variable,Sub-100nm
5,2015,Deep Learning CNN,"ResNet, DenseNet architectures",Real-time,10-50nm
6,2018,Advanced CNN,"Lightweight models, transfer learning",Real-time,5-10nm
7,2021,Transformer Era,"Vision Transformers, attention mechanisms",Real-time,3-5nm
8,2024,Hybrid & VLM,"Multimodal, Vision-Language Models",Real-time,Sub-3nm


In [33]:
df = dfs['table1_model_performance']
df

Unnamed: 0,Model,Architecture Type,Accuracy (%),Parameters,FLOPs,Dataset,Computational Cost
0,ResNet50,CNN,97.19,25.0M,4.1B,WM-811K,High
1,Swin Transformer,Transformer,97.47,28.3M,4.5B,MixedWM38,High
2,Vision Transformer (ViT),Transformer,96.77,86.3M,17.6B,MixedWM38,Very High
3,ViT-Tiny,Transformer,98.4,5.7M,1.2B,WM-38k,Medium
4,Autoencoder+CNN,Hybrid,98.56,23.8M,3.2B,WM-811K,High
5,ShuffleNet-v2,Lightweight CNN,96.93,1.3M,146M,WM-811K,Low
6,MobileNetV3,Lightweight CNN,98.0,3.2M,219M,WM-811K,Low
7,EfficientNet V2-S,CNN,97.8,21.5M,8.8B,WM-811K,High
8,Random Forest,Traditional ML,79.5,,,WM-811K,Very Low
9,SVM,Traditional ML,77.5,,,WM-811K,Very Low


In [46]:
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default = "browser"  # or "iframe"

df = pd.read_csv("../data/raw/table1_model_performance.csv")  # adjust path

fig = go.Figure(data=[go.Table(
    header=dict(values=list(df.columns), fill_color="#f5f5f5", align="left"),
    cells=dict(values=[df[col] for col in df.columns], align="left")
)])
fig.update_layout(height=min(400, 120 + 24 * len(df)), margin=dict(l=0, r=0, t=30, b=0))
fig.show()
