In [3]:
import pandas as pd
import numpy as np

In [5]:
np.random.seed(42)


In [6]:
# Parameters for synthetic data
num_machines = 100  # Number of unique machines
num_records = 1000  # Total number of records

In [7]:
# Generate data
data = {
    "Machine_ID": np.random.choice([f"Machine_{i+1}" for i in range(num_machines)], size=num_records),
    "Temperature": np.random.uniform(20, 100, size=num_records),  # Temperature in degrees Celsius
    "Run_Time": np.random.uniform(0, 24, size=num_records),  # Run time in hours
}

In [8]:
# Add a downtime flag: 1 if Run_Time < 2 or Temperature > 80, otherwise 0
data["Downtime_Flag"] = [
    1 if (runtime < 2 or temp > 80) else 0
    for runtime, temp in zip(data["Run_Time"], data["Temperature"])
]

In [9]:
# Create DataFrame
df = pd.DataFrame(data)

In [10]:
# Save to CSV
df.to_csv("synthetic_manufacturing_data.csv", index=False)

In [11]:
# Display sample data
print(df.head())

   Machine_ID  Temperature   Run_Time  Downtime_Flag
0  Machine_52    66.947289   6.314702              0
1  Machine_93    79.635158   8.643274              0
2  Machine_15    54.532764   2.103426              0
3  Machine_72    30.206424  22.486988              0
4  Machine_61    42.702072  13.291254              0


model

In [21]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import joblib


In [22]:
# Load dataset
data = pd.read_csv('synthetic_manufacturing_data.csv')

In [23]:
data['Temp_Run_Product'] = data['Temperature'] * data['Run_Time']


In [24]:
from scipy.stats import zscore
data = data[(np.abs(zscore(data[['Temperature', 'Run_Time']])) < 3).all(axis=1)]


In [33]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [26]:
data.columns


Index(['Machine_ID', 'Temperature', 'Run_Time', 'Downtime_Flag',
       'Temp_Run_Product'],
      dtype='object')

In [27]:
# Features and target
X = data[['Temperature', 'Run_Time']]
y = data['Downtime_Flag']

In [28]:
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [29]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(X_train, y_train)


In [30]:
# Evaluate model
y_pred = model.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")

Accuracy: 1.00


In [31]:
from sklearn.model_selection import cross_val_score
scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
print(f"Cross-Validation Accuracy: {scores.mean():.2f}")


Cross-Validation Accuracy: 1.00


In [32]:
# Save model
joblib.dump(model, 'model.joblib')

['model.joblib']