## Phase 1: Simulating production logs
### 1. Generate dates for Q3 July-September and shifts
### 2. Generate unit IDs and product types
### 3. Generate product categories and types
### 4. Simulate processing times  


In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import random 
from datetime import datetime, timedelta
import json

In [None]:
# Determine foundational information
# Define start and end dates
start_date = datetime(2025, 1, 1)
end_date = datetime(2025, 6, 1)

# Calculate the number of days 
num_days = (end_date - start_date).days + 1

# Iterate through the datesâ€¹
dates = [start_date + timedelta(days=i) for i in range(num_days)] 

# Define shift labels
shifts = ['Shift 1', 'Shift 2', 'Shift 3']
shift_times = [6, 14, 22]
# Define number of machines
machines = 20

# Create product_type column
type_options = ['Standard', 'Custom']

# Create product_category column
category_options = ['Doors', 'Walls', 'Flooring', 'Stairs', 'Partitions']

# Production Logs
production_logs = []

# Unit counter
unit_counter = 1

# Main Loop
for date in dates:
    for shift, hour in zip(shifts, shift_times):
        for machine in range(1, machines +1):
            for unit in range(1, 7):
                ts = datetime(date.year, date.month, date.day, hour)
                unit_id = f"G-{unit_counter:06d}"
                unit_counter +=1
                machine_id = f"M{machine:02d}"
                product_type = random.choice(type_options)
                product_category = random.choice(category_options)
                if product_type == 'Standard':
                    cutting_time = round(random.normalvariate(mu=0.75, sigma=0.2), 2)
                    tempering_time = round(random.normalvariate(mu=9.0, sigma=1.5), 2)
                    framing_time = round(random.normalvariate(mu=7.0, sigma=1.5), 2)
                elif product_type == 'Custom':
                    cutting_time = round(random.normalvariate(mu=10.0, sigma=2.0), 2)
                    tempering_time = round(random.normalvariate(mu=9.0, sigma=1.5), 2)
                    framing_time = round(random.normalvariate(mu=45.0, sigma=5), 2)
                if product_category in ['Flooring', 'Stairs']:
                    cutting_time += 2
                    framing_time += 15
            
                record = {
                    "timestamp": ts.isoformat(),
                    "unit_id": unit_id ,
                    "shift": shift,
                    "machine_id": machine_id,
                    "product_type": product_type,
                    "product_category": product_category,
                    "cutting_time": cutting_time,
                    "tempering_time": tempering_time,
                    "framing_time": framing_time
                }
                production_logs.append(record)
                
# Review amount of data generated
len(production_logs)

54720

In [4]:
# Review results
production_logs[53100]

{'timestamp': '2025-05-28T14:00:00',
 'unit_id': 'G-053101',
 'shift': 'Shift 2',
 'machine_id': 'M11',
 'product_type': 'Standard',
 'product_category': 'Stairs',
 'cutting_time': 2.54,
 'tempering_time': 8.3,
 'framing_time': 21.21}

In [5]:
# Save to NDJSON for SQL
with open("Production_Logs.json", 'w') as f:
    for item in production_logs:
       f.write(json.dumps(item) + '\n')

In [6]:
# Save to JSON for Python and CSV files
with open ('Production_Logs_Single.json', 'w') as f:
    json.dump(production_logs, f, indent=2)

## Phase 2: Simulating quality audit
### 1. Simulate QC Result and Rework Flag
### 2. Simulate downtime minutes
### 3. Simulate rework reasons

In [2]:
# Load production logs into pandas for QC generation 
production = pd.read_json('/Users/AndreaLopera/Desktop/Data Science Portfolio/Production-Line-Intelligence-Dashboard-main/data/Production_Logs_Single.json')
production.head()

Unnamed: 0,timestamp,unit_id,shift,machine_id,product_type,product_category,cutting_time,tempering_time,framing_time
0,2025-01-01 06:00:00,G-000001,Shift 1,M01,Custom,Flooring,13.21,10.0,59.27
1,2025-01-01 06:00:00,G-000002,Shift 1,M01,Custom,Flooring,14.7,9.12,62.67
2,2025-01-01 06:00:00,G-000003,Shift 1,M01,Standard,Stairs,2.67,9.14,21.46
3,2025-01-01 06:00:00,G-000004,Shift 1,M01,Custom,Walls,6.09,9.36,37.67
4,2025-01-01 06:00:00,G-000005,Shift 1,M01,Standard,Partitions,1.14,5.54,5.46


In [3]:
# Simulate QC Result and Rework Flag
n_rows = len(production)
# Quality check
qc_options = ['Pass', 'Fail']
# Rework reason
rework_reason_options = {
    "cutting_issues": ['Edge chip', 'Incorrect dimensions'],
    "tempering_issues": ['Contamination', 'Scratched glass'],
    "framing_issues": ['Misaligned frame','Incorrect dimensions']
   }

# Define machine_specific defect tendency
machine_risk = {
    "M01": +0.05,
    "M02": +0.02,
    "M03": +0.01,
    "M04": +0.02,
    "M05": +0.01,
    "M06": +0.02,
    "M07": -0.01,
    "M08": +0.03,
    "M09": +0.02,
    "M10": -0.02,
    "M11": +0.02,
    "M12": +0.03,
    "M13": +0.01,
    "M14": +0.02,
    "M15": +0.04,
    "M16": +0.05,
    "M17": -0.01,
    "M18": +0.02,
    "M19": +0.01,
    "M20": +0.02
}

# Add a shift_risk term
shift_risk = {
    "Shift 1": 0.00, # baseline
    "Shift 2": 0.02, # a bit more pressure
    "Shift 3": 0.05  # night shift always has highest risk
}
# Define time thresholds that flag process anomalies
# For cutting either if the time is under 0.35 or higher than 1.15
production['cut_process_flag'] = (production['cutting_time'] < 0.35) | (production['cutting_time'] > 1.15)
# For tempering either if the time is under 6.0 or higher than 12.0
production['temp_process_flag'] = (production['tempering_time'] < 6.0) | (production['tempering_time'] > 12.0)
# For framing either if the time is under 4.0 or higher than 11.0
production['frame_process_flag'] = (production['framing_time'] < 4.0) | (production['framing_time'] > 11.0)

# Set a base probability of failure
production['p_fail'] = 0.03

# Build the probability of failure per column (series wise)
production.loc[production['product_type'] == 'Custom', 'p_fail'] +=0.10
production.loc[production['product_category'].isin(['Stairs', 'Flooring']), 'p_fail'] +=0.08
production.loc[production['temp_process_flag'], 'p_fail'] +=0.12
production.loc[production['cut_process_flag'], 'p_fail'] +=0.10
production.loc[production['frame_process_flag'], 'p_fail'] +=0.08

# Machine + shift risk (vectorized)
production['p_fail'] += production['machine_id'].map(machine_risk)
production['p_fail'] += production['shift'].map(shift_risk)

# Clamp values so probabilities do not get out of proportion
# Fix high values to 80%, meaning: even the worst part fails at most 80% of the time.
production.loc[production['p_fail'] > 0.80, 'p_fail'] = 0.80
# Fix low values to 2%, meaning: even the best part has at least 2% chance to fail
production.loc[production['p_fail'] < 0.02, 'p_fail'] = 0.02

# Draw one random U(0,1) per row
rand_vals = np.random.rand(n_rows)
# QC Result and rework_flag
production['qc_result'] = np.where(rand_vals < production['p_fail'], 'Fail', 'Pass')
production['rework_flag'] = np.where(production['qc_result'] == 'Fail', 1, 0)

# Tie downtime to rework flag random arrays
pass_downtime = np.random.randint(0, 4, size=n_rows)
fail_downtime = np.random.randint(20, 61, size=n_rows)
# Downtime
production['downtime_minutes'] = np.where(
    production['rework_flag'] == 1, 
    fail_downtime, 
    pass_downtime)

# define a process to assign rework reasons
def choose_rework_reason(row):
    # Units that don't fail don't receive a reason
    if row['qc_result'] != 'Fail':
        return None
    # Add rework reason based on:
    # Machine malfunction
    if machine_risk[row['machine_id']] > 0.03:
       return "Machine Malfunction"
    # Human Error/Poor Hnadling
    if row['cut_process_flag']:
        return random.choice(rework_reason_options['cutting_issues'])
    if row['temp_process_flag']:
        return random.choice(rework_reason_options['tempering_issues'])
    if row['frame_process_flag']:
        return random.choice(rework_reason_options['framing_issues'])
    # Shift-related Human Error
    if shift_risk[row['shift']] >= 0.05:
        return "Fatigue / Shift Pressure"
    # Fallback: some generic reason if we don't know why
    all_reasons = (
        rework_reason_options['cutting_issues'] +
        rework_reason_options['tempering_issues'] +
        rework_reason_options['framing_issues']
    )
    return random.choice(all_reasons)

# Add rework series to the dataset
production['rework_reason'] = production.apply(choose_rework_reason, axis=1)

# Override rework reason to assign 0.10% to Material Defect / Design Issues
fail_mask = production['qc_result'] == 'Fail'
override_prob = 0.10 
rand_override = np.random.rand(len(production))
override_mask = fail_mask & (rand_override < override_prob)

n_override = override_mask.sum()
rand_choice = np.random.rand(n_override)

production.loc[override_mask, 'rework_reason'] = np.where(
    rand_choice < 0.5,
    'Material Defect',
    'Design / Specification Issue'
)

production = production.drop(columns=['p_fail'])
# Review results
production.head()

Unnamed: 0,timestamp,unit_id,shift,machine_id,product_type,product_category,cutting_time,tempering_time,framing_time,cut_process_flag,temp_process_flag,frame_process_flag,qc_result,rework_flag,downtime_minutes,rework_reason
0,2025-01-01 06:00:00,G-000001,Shift 1,M01,Custom,Flooring,13.21,10.0,59.27,True,False,True,Pass,0,1,
1,2025-01-01 06:00:00,G-000002,Shift 1,M01,Custom,Flooring,14.7,9.12,62.67,True,False,True,Pass,0,1,
2,2025-01-01 06:00:00,G-000003,Shift 1,M01,Standard,Stairs,2.67,9.14,21.46,True,False,True,Pass,0,1,
3,2025-01-01 06:00:00,G-000004,Shift 1,M01,Custom,Walls,6.09,9.36,37.67,True,False,True,Fail,1,23,Machine Malfunction
4,2025-01-01 06:00:00,G-000005,Shift 1,M01,Standard,Partitions,1.14,5.54,5.46,False,True,False,Pass,0,0,


In [4]:
production.tail()

Unnamed: 0,timestamp,unit_id,shift,machine_id,product_type,product_category,cutting_time,tempering_time,framing_time,cut_process_flag,temp_process_flag,frame_process_flag,qc_result,rework_flag,downtime_minutes,rework_reason
54715,2025-06-01 22:00:00,G-054716,Shift 3,M20,Custom,Doors,11.5,9.22,39.11,True,False,True,Fail,1,51,Incorrect dimensions
54716,2025-06-01 22:00:00,G-054717,Shift 3,M20,Standard,Flooring,2.53,9.09,24.87,True,False,True,Fail,1,25,Incorrect dimensions
54717,2025-06-01 22:00:00,G-054718,Shift 3,M20,Standard,Partitions,0.6,9.68,6.07,False,False,False,Pass,0,0,
54718,2025-06-01 22:00:00,G-054719,Shift 3,M20,Custom,Doors,10.06,7.88,39.65,True,False,True,Pass,0,1,
54719,2025-06-01 22:00:00,G-054720,Shift 3,M20,Standard,Partitions,0.79,9.9,6.9,False,False,False,Pass,0,1,


In [5]:
production.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 54720 entries, 0 to 54719
Data columns (total 16 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   timestamp           54720 non-null  datetime64[ns]
 1   unit_id             54720 non-null  object        
 2   shift               54720 non-null  object        
 3   machine_id          54720 non-null  object        
 4   product_type        54720 non-null  object        
 5   product_category    54720 non-null  object        
 6   cutting_time        54720 non-null  float64       
 7   tempering_time      54720 non-null  float64       
 8   framing_time        54720 non-null  float64       
 9   cut_process_flag    54720 non-null  bool          
 10  temp_process_flag   54720 non-null  bool          
 11  frame_process_flag  54720 non-null  bool          
 12  qc_result           54720 non-null  object        
 13  rework_flag         54720 non-null  int64     

In [7]:
production.columns

Index(['timestamp', 'unit_id', 'shift', 'machine_id', 'product_type',
       'product_category', 'cutting_time', 'tempering_time', 'framing_time',
       'cut_process_flag', 'temp_process_flag', 'frame_process_flag',
       'qc_result', 'rework_flag', 'downtime_minutes', 'rework_reason'],
      dtype='object')

In [9]:
# Create the Quality_Audit DataFrame
quality_audit = production[['unit_id', 'qc_result', 'rework_flag', 'downtime_minutes', 'rework_reason']]
quality_audit.head()

Unnamed: 0,unit_id,qc_result,rework_flag,downtime_minutes,rework_reason
0,G-000001,Pass,0,1,
1,G-000002,Pass,0,1,
2,G-000003,Pass,0,1,
3,G-000004,Fail,1,23,Machine Malfunction
4,G-000005,Pass,0,0,


In [10]:
quality_audit.tail()

Unnamed: 0,unit_id,qc_result,rework_flag,downtime_minutes,rework_reason
54715,G-054716,Fail,1,51,Incorrect dimensions
54716,G-054717,Fail,1,25,Incorrect dimensions
54717,G-054718,Pass,0,0,
54718,G-054719,Pass,0,1,
54719,G-054720,Pass,0,1,


In [10]:
# Save as CSV
quality_audit.to_csv('/Users/AndreaLopera/Desktop/Data Science Portfolio/Production-Line-Intelligence-Dashboard-main/data/Quality_Audit.csv', index=False)