# Simulating the Data
## Write a Python script that creates a DataFrame for the production line
### 1. Create a list of 100 production dates and shifts (Day/Night)
### 2. Generate unit IDs and product types
### 3. Generate product categories
### 4. Simulate Processing Times  
### 5. Simulate QC Result and Rework Flag


In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import random 
from datetime import datetime, timedelta

In [2]:
# Generate 100 sequential production dates starting from May 1, 2025
start_date = datetime(2025, 5, 1)
dates = [start_date + timedelta(days=i) for i in range(100)] # two shifts per day

# Define shift labels
shifts = ['Shift 1', 'Shift 2', 'Shift 3']
shift_times = [6, 14, 22]

# Create three records per day
production_dates = []
shift_labels =[]
shift_datetimes = []

for date in dates:
    for shift, hour in zip(shifts, shift_times):
        production_dates.append(date.date())
        shift_labels.append(shift)
        shift_datetimes.append(datetime(date.year, date.month, date.day, hour))

In [3]:
# Create a DataFrame
df = pd.DataFrame({
    'production_date': production_dates,
    'shift': shift_labels,
    'shift_start_time': shift_datetimes
})
df.head(6)

Unnamed: 0,production_date,shift,shift_start_time
0,2025-05-01,Shift 1,2025-05-01 06:00:00
1,2025-05-01,Shift 2,2025-05-01 14:00:00
2,2025-05-01,Shift 3,2025-05-01 22:00:00
3,2025-05-02,Shift 1,2025-05-02 06:00:00
4,2025-05-02,Shift 2,2025-05-02 14:00:00
5,2025-05-02,Shift 3,2025-05-02 22:00:00


In [4]:
# Create unit_id column
id_name = 'G-'
df['unit_id'] = [id_name + str(i+1).zfill(4) for i in range(300)]
df.tail()

Unnamed: 0,production_date,shift,shift_start_time,unit_id
295,2025-08-07,Shift 2,2025-08-07 14:00:00,G-0296
296,2025-08-07,Shift 3,2025-08-07 22:00:00,G-0297
297,2025-08-08,Shift 1,2025-08-08 06:00:00,G-0298
298,2025-08-08,Shift 2,2025-08-08 14:00:00,G-0299
299,2025-08-08,Shift 3,2025-08-08 22:00:00,G-0300


In [5]:
# Create product_type column
type_options = ['Standard', 'Custom']
df['product_type'] = [random.choices(type_options, weights=[0.80, 0.20], k=1)[0] for i in range(300)]
df.head()

Unnamed: 0,production_date,shift,shift_start_time,unit_id,product_type
0,2025-05-01,Shift 1,2025-05-01 06:00:00,G-0001,Standard
1,2025-05-01,Shift 2,2025-05-01 14:00:00,G-0002,Standard
2,2025-05-01,Shift 3,2025-05-01 22:00:00,G-0003,Standard
3,2025-05-02,Shift 1,2025-05-02 06:00:00,G-0004,Custom
4,2025-05-02,Shift 2,2025-05-02 14:00:00,G-0005,Custom


In [6]:
# Create product_category column
category_options = ['Doors', 'Walls', 'Flooring', 'Stairs', 'Partitions']
df['product_category'] = [random.choices(category_options, weights=[0.40, 0.25, 0.15, 0.10, 0.10])[0] for i in range(300)]
df.head()

Unnamed: 0,production_date,shift,shift_start_time,unit_id,product_type,product_category
0,2025-05-01,Shift 1,2025-05-01 06:00:00,G-0001,Standard,Doors
1,2025-05-01,Shift 2,2025-05-01 14:00:00,G-0002,Standard,Stairs
2,2025-05-01,Shift 3,2025-05-01 22:00:00,G-0003,Standard,Doors
3,2025-05-02,Shift 1,2025-05-02 06:00:00,G-0004,Custom,Walls
4,2025-05-02,Shift 2,2025-05-02 14:00:00,G-0005,Custom,Doors


In [7]:
# Simulate Processing Times
# Stations: Cutting, Tempering, and Framing
df['cutting_time'] = [round(random.normalvariate(mu=10.0, sigma=2.0), 2) for i in range(300)]
df['tempering_time'] = [round(random.normalvariate(mu=15.0, sigma=3.0), 2) for i in range(300)]
df['framing_time'] = [round(random.normalvariate(mu=12, sigma=2.5), 2) for i in range(300)]

df.head()

Unnamed: 0,production_date,shift,shift_start_time,unit_id,product_type,product_category,cutting_time,tempering_time,framing_time
0,2025-05-01,Shift 1,2025-05-01 06:00:00,G-0001,Standard,Doors,5.59,13.34,14.78
1,2025-05-01,Shift 2,2025-05-01 14:00:00,G-0002,Standard,Stairs,11.71,16.15,9.78
2,2025-05-01,Shift 3,2025-05-01 22:00:00,G-0003,Standard,Doors,8.88,17.07,6.7
3,2025-05-02,Shift 1,2025-05-02 06:00:00,G-0004,Custom,Walls,10.43,18.25,13.03
4,2025-05-02,Shift 2,2025-05-02 14:00:00,G-0005,Custom,Doors,7.31,18.38,14.94


In [8]:
# Simulate QC Result and Rework Flag
qc_options = ['Pass', 'Fail']
df['qc_result'] = [random.choices(qc_options, weights=[0.90, 0.10], k=1)[0] for i in range(300)]
df['rework_flag'] = np.where(df['qc_result'] == 'Fail', 1, 0)

df.head()

Unnamed: 0,production_date,shift,shift_start_time,unit_id,product_type,product_category,cutting_time,tempering_time,framing_time,qc_result,rework_flag
0,2025-05-01,Shift 1,2025-05-01 06:00:00,G-0001,Standard,Doors,5.59,13.34,14.78,Pass,0
1,2025-05-01,Shift 2,2025-05-01 14:00:00,G-0002,Standard,Stairs,11.71,16.15,9.78,Pass,0
2,2025-05-01,Shift 3,2025-05-01 22:00:00,G-0003,Standard,Doors,8.88,17.07,6.7,Pass,0
3,2025-05-02,Shift 1,2025-05-02 06:00:00,G-0004,Custom,Walls,10.43,18.25,13.03,Pass,0
4,2025-05-02,Shift 2,2025-05-02 14:00:00,G-0005,Custom,Doors,7.31,18.38,14.94,Pass,0


In [9]:
df['qc_result'].value_counts()
df['rework_flag'].value_counts()

rework_flag
0    273
1     27
Name: count, dtype: int64

In [10]:
# downtime_minutes
df['downtime_minutes'] = [random.choices([random.randrange(0, 3), random.randrange(10,30)], weights=[0.9, 0.1])[0] for i in range(300)]
df.head()

Unnamed: 0,production_date,shift,shift_start_time,unit_id,product_type,product_category,cutting_time,tempering_time,framing_time,qc_result,rework_flag,downtime_minutes
0,2025-05-01,Shift 1,2025-05-01 06:00:00,G-0001,Standard,Doors,5.59,13.34,14.78,Pass,0,0
1,2025-05-01,Shift 2,2025-05-01 14:00:00,G-0002,Standard,Stairs,11.71,16.15,9.78,Pass,0,0
2,2025-05-01,Shift 3,2025-05-01 22:00:00,G-0003,Standard,Doors,8.88,17.07,6.7,Pass,0,2
3,2025-05-02,Shift 1,2025-05-02 06:00:00,G-0004,Custom,Walls,10.43,18.25,13.03,Pass,0,0
4,2025-05-02,Shift 2,2025-05-02 14:00:00,G-0005,Custom,Doors,7.31,18.38,14.94,Pass,0,2


In [11]:
# rework reason
rework_reason_options = ['Misaligned frame', 'Edge chip', 'Scratched glass', 'Incorrect dimensions', 'Contamination']
df['rework_reason'] = df['rework_flag'].apply(lambda x: random.choice(rework_reason_options) if x == 1 else None)
df.head()

Unnamed: 0,production_date,shift,shift_start_time,unit_id,product_type,product_category,cutting_time,tempering_time,framing_time,qc_result,rework_flag,downtime_minutes,rework_reason
0,2025-05-01,Shift 1,2025-05-01 06:00:00,G-0001,Standard,Doors,5.59,13.34,14.78,Pass,0,0,
1,2025-05-01,Shift 2,2025-05-01 14:00:00,G-0002,Standard,Stairs,11.71,16.15,9.78,Pass,0,0,
2,2025-05-01,Shift 3,2025-05-01 22:00:00,G-0003,Standard,Doors,8.88,17.07,6.7,Pass,0,2,
3,2025-05-02,Shift 1,2025-05-02 06:00:00,G-0004,Custom,Walls,10.43,18.25,13.03,Pass,0,0,
4,2025-05-02,Shift 2,2025-05-02 14:00:00,G-0005,Custom,Doors,7.31,18.38,14.94,Pass,0,2,


In [12]:
df['rework_reason'].value_counts()

rework_reason
Incorrect dimensions    10
Contamination            6
Edge chip                5
Misaligned frame         3
Scratched glass          3
Name: count, dtype: int64

In [13]:
df.shape

(300, 13)

In [14]:
df.dtypes

production_date             object
shift                       object
shift_start_time    datetime64[ns]
unit_id                     object
product_type                object
product_category            object
cutting_time               float64
tempering_time             float64
framing_time               float64
qc_result                   object
rework_flag                  int64
downtime_minutes             int64
rework_reason               object
dtype: object

In [16]:
df.to_csv('/Users/AndreaLopera/Desktop/Data Science Portfolio/Production-Line-Intelligence-Dashboard-main/data/production_data.csv', index=False)