In [15]:
import pandas as pd
import random
import numpy as np
import random
from datetime import datetime, timedelta

# Customers Data

In [16]:
# Generate a random date
def random_date(start, end):
    return start + timedelta(days=random.randint(0, (end - start).days))

In [17]:
# Scenario parameters
products = ['Resin A', 'Resin B', 'Catalyst X', 'Additive Y']
customers = ['United States', 'Brazil', 'Canada','Germany', 'France', 'United Kingdom','China', 'India', 'Japan','Nigeria', 'South Africa', 'Egypt','Australia', 'New Zealand']
types_nc = ['Out of specification', 'Cross Contamination', 'Drum leakeage', 'Labeling error']
origins = ['Costumer', 'Internal Inspection', 'Line Operator', 'Internal Audit']
severity = ['High', 'Medium', 'Low']
responsible = ['Quality Engineer', 'Production Technician', 'Shift Supervisor', 'Process Engineer']
status_nc = ['Open', 'In progress', 'Resolved']

In [18]:
# Geração de dados simulados
n_occurrences = 30
start_date = datetime(2024, 1, 1)
end_date = datetime(2025, 5, 31)

data_occurrences = []

for i in range(1, n_occurrences + 1):
    date_occurrence = random_date(start_date, end_date)
    data_occurrences.append({
        "id": i,
        "date": date_occurrence,
        "product": random.choice(products),
        "customer": random.choice(customers),
        "type_nonconformity": random.choice(types_nc),
        "origin": random.choice(origins),
        "severity": random.choices(severity, weights=[0.4, 0.4, 0.2])[0],
        "responsible": random.choice(responsible),
        "status": random.choice(status_nc)
    })

In [19]:
df_occurrences = pd.DataFrame(data_occurrences)
df_occurrences.head()

Unnamed: 0,id,date,product,customer,type_nonconformity,origin,severity,responsible,status
0,1,2024-07-05,Resin A,India,Cross Contamination,Internal Inspection,High,Production Technician,Open
1,2,2024-09-22,Additive Y,India,Out of specification,Internal Inspection,High,Quality Engineer,In progress
2,3,2024-05-07,Catalyst X,Brazil,Drum leakeage,Costumer,Medium,Process Engineer,In progress
3,4,2024-04-13,Resin A,United States,Drum leakeage,Costumer,Medium,Quality Engineer,Open
4,5,2024-08-04,Resin A,China,Drum leakeage,Internal Audit,Medium,Shift Supervisor,Resolved


In [20]:
df_occurrences.to_csv("occurences_data.csv", sep='\t', encoding='utf-8', index=False, header=True)

# Production Process Data

In [21]:
np.random.seed(42)
random.seed(42)

In [22]:
start_date = "2024-01-01"
end_date = "2025-05-31"

days = pd.date_range(start=start_date, end=end_date, freq='D')
shifts = ['Morning', 'Afternoon', 'Night']

In [23]:
data = []

In [24]:
for day in days:
    for shift in shifts:  
        planned_qty = random.randint(900, 1100)
        produced_qty = int(planned_qty * random.uniform(0.85, 1.05))

        data.append({          
            'Date': day,
            'Shift': shift,
            'Planned_Quantity': planned_qty,
            'Produced_Quantity': produced_qty,
            'Downtime_Minutes': random.randint(20, 80),
            'Available_Time_Minutes': 480,
            'Rework_Quantity': int(produced_qty * random.uniform(0.01, 0.05)),
            'NC_Occurrences': random.randint(0, 3),
            'Avg_Response_Time_Minutes': random.uniform(30, 180)
        })

# KEY PERFORMANCE INDICATORS

# Overall Equipment Effectiveness

$$
\text{OEE} = \text{Availability} \times \text{Performance} \times \text{Quality}
$$

---

## 1. Availability

$$
\text{Availability} = \frac{\text{Available Time} - \text{Downtime}}{\text{Available Time}}
$$

---

## 2. Performance

$$
\text{Performance} = \frac{\text{Produced}}{\text{Planned}}
$$

---

## 3. Quality

$$
\text{Quality} = \frac{\text{Produced} - \text{Rework}}{\text{Produced}}
$$

In [26]:
df_production['Availability'] = (df_production['Available_Time_Minutes'] - df_production['Downtime_Minutes']) / df_production['Available_Time_Minutes']

df_production['Performance'] = df_production['Produced_Quantity'] / df_production['Planned_Quantity']

df_production['Quality'] = (df_production['Produced_Quantity'] - df_production['Rework_Quantity']) / df_production['Produced_Quantity']

df_production['OEE'] = df_production['Availability'] * df_production['Performance'] * df_production['Quality']

In [27]:
df_production

Unnamed: 0,Date,Shift,Planned_Quantity,Produced_Quantity,Downtime_Minutes,Available_Time_Minutes,Rework_Quantity,NC_Occurrences,Avg_Response_Time_Minutes,Availability,Performance,Quality,OEE
0,2024-01-01,Morning,1063,927,67,480,19,1,50.930689,0.860417,0.872060,0.979504,0.734956
1,2024-01-01,Afternoon,926,912,77,480,29,3,34.767402,0.839583,0.984881,0.968202,0.800596
2,2024-01-01,Night,923,824,52,480,28,1,137.402942,0.891667,0.892741,0.966019,0.768978
3,2024-01-02,Morning,1079,1034,34,480,28,2,151.414569,0.929167,0.958295,0.972921,0.866304
4,2024-01-02,Afternoon,901,902,30,480,34,2,71.680701,0.937500,1.001110,0.962306,0.903163
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1546,2025-05-30,Afternoon,1087,1013,76,480,47,0,46.391111,0.841667,0.931923,0.953603,0.747976
1547,2025-05-30,Night,1046,1051,42,480,44,2,142.309511,0.912500,1.004780,0.958135,0.878478
1548,2025-05-31,Morning,1091,1014,39,480,10,1,152.458407,0.918750,0.929423,0.990138,0.845486
1549,2025-05-31,Afternoon,900,782,69,480,29,3,128.178179,0.856250,0.868889,0.962916,0.716396


In [28]:
df_production.to_csv("production_data.csv", sep='\t', encoding='utf-8', index=False, header=True)