In [1]:
import pandas as pd
import numpy as np
import random

# 1. Setup Parameters
num_wafers = 20
dies_per_wafer = 500  # Total rows will be ~10,000
batches = ['BATCH_A1', 'BATCH_A2', 'BATCH_B1']
machines = ['ASML_LITH_01', 'TEL_ETCH_04', 'AMAT_DEP_02']

data = []

for batch in batches:
    for wafer_id in range(1, num_wafers + 1):
        # Select a machine for this wafer
        machine = random.choice(machines)
        
        for i in range(dies_per_wafer):
            # Generate circular coordinates (Wafer is circular)
            # Coordinates range from -10 to 10
            x = random.uniform(-10, 10)
            y = random.uniform(-10, 10)
            
            # Keep only dies within the 10-unit radius circular wafer
            distance_from_center = np.sqrt(x**2 + y**2)
            if distance_from_center > 10:
                continue
            
            # Base Pass probability (92% pass)
            pass_prob = 0.92
            
            # Inject a "Spatial Defect": High failure rate at the wafer edge
            if distance_from_center > 8.5:
                pass_prob = 0.60 # 40% failure at edges (common in Penang fabs)
            
            result = 1 if random.random() < pass_prob else 0
            
            # Assign Defect Type if Result is Fail (0)
            defect = "None"
            if result == 0:
                defect = random.choice(['Scratch', 'Contamination', 'Particle', 'Photo_Error'])
            
            data.append([batch, f"W_{wafer_id}", machine, round(x, 2), round(y, 2), result, defect])

# 2. Create DataFrame
df = pd.DataFrame(data, columns=['BatchID', 'WaferID', 'MachineID', 'Die_X', 'Die_Y', 'Result', 'DefectType'])

# 3. Save to CSV
df.to_csv('penang_factory_yield.csv', index=False)
print("Success! 'penang_factory_yield.csv' has been created.")

Success! 'penang_factory_yield.csv' has been created.


In [2]:
df

Unnamed: 0,BatchID,WaferID,MachineID,Die_X,Die_Y,Result,DefectType
0,BATCH_A1,W_1,AMAT_DEP_02,-3.11,-6.47,1,
1,BATCH_A1,W_1,AMAT_DEP_02,1.58,3.73,1,
2,BATCH_A1,W_1,AMAT_DEP_02,-2.98,-5.87,0,Scratch
3,BATCH_A1,W_1,AMAT_DEP_02,2.37,-1.23,1,
4,BATCH_A1,W_1,AMAT_DEP_02,5.11,-1.69,1,
...,...,...,...,...,...,...,...
23576,BATCH_B1,W_20,AMAT_DEP_02,4.65,3.57,1,
23577,BATCH_B1,W_20,AMAT_DEP_02,-8.47,1.99,0,Contamination
23578,BATCH_B1,W_20,AMAT_DEP_02,1.22,0.81,1,
23579,BATCH_B1,W_20,AMAT_DEP_02,0.38,9.97,0,Photo_Error


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23581 entries, 0 to 23580
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   BatchID     23581 non-null  object 
 1   WaferID     23581 non-null  object 
 2   MachineID   23581 non-null  object 
 3   Die_X       23581 non-null  float64
 4   Die_Y       23581 non-null  float64
 5   Result      23581 non-null  int64  
 6   DefectType  23581 non-null  object 
dtypes: float64(2), int64(1), object(4)
memory usage: 1.3+ MB
