In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import StandardScaler

# Set a random seed for reproducibility
np.random.seed(42)

# Number of samples
n_samples = 1000

# Features: (Randomly generated values simulating the levels of each feature)
features = {
    'Automation_Level': np.random.randint(1, 6, n_samples),  # Levels 1 to 5
    'Connectivity': np.random.randint(1, 6, n_samples),
    'Data_Acquisition': np.random.randint(1, 6, n_samples),
    'IT_OT_Integration': np.random.randint(1, 6, n_samples),
    'Process_Standardization': np.random.randint(1, 6, n_samples),
    'Digital_Control': np.random.randint(1, 6, n_samples),
    'Real_Time_Monitoring': np.random.randint(1, 6, n_samples),
    'Predictive_Maintenance': np.random.randint(1, 6, n_samples),
    'Data_Integration': np.random.randint(1, 6, n_samples),
    'Advanced_Analytics': np.random.randint(1, 6, n_samples),
    'Digital_Culture': np.random.randint(1, 6, n_samples),
    'Skill_Levels': np.random.randint(1, 6, n_samples),
    'Leadership_Commitment': np.random.randint(1, 6, n_samples)
}

# Convert to DataFrame
df = pd.DataFrame(features)

# Target variable: Maturity Levels (1 to 6, where 1 is the lowest and 6 is the highest)
# We'll simulate a target based on a weighted sum of features
df['Maturity_Level'] = (df.sum(axis=1) // 10).clip(1, 6)  # Scale and clip to levels 1-6

# Show the first few rows of the data
df.head()

Unnamed: 0,Automation_Level,Connectivity,Data_Acquisition,IT_OT_Integration,Process_Standardization,Digital_Control,Real_Time_Monitoring,Predictive_Maintenance,Data_Integration,Advanced_Analytics,Digital_Culture,Skill_Levels,Leadership_Commitment,Maturity_Level
0,4,4,4,3,3,2,1,5,1,3,2,4,4,4
1,5,4,3,5,5,5,4,2,5,2,4,2,1,4
2,3,4,2,1,2,4,2,5,5,3,5,4,4,4
3,5,4,1,5,5,4,3,1,5,2,4,3,5,4
4,5,5,5,2,2,5,2,2,5,5,4,5,3,5


In [5]:
# Features and target variable
X = df.drop('Maturity_Level', axis=1)
y = df['Maturity_Level']

# Split into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data (optional, but helps with certain models)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [6]:
# Initialize the model
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)


In [7]:
# Model accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(conf_matrix)

unique_labels = np.unique(np.concatenate((y_test, y_pred)))

# Classification report
target_names = [f'Level {i}' for i in unique_labels]
class_report = classification_report(y_test, y_pred, target_names=target_names)
print('Classification Report:')
print(class_report)


Accuracy: 0.77
Confusion Matrix:
[[ 0  7  0  0]
 [ 0 89 12  0]
 [ 0 21 65  0]
 [ 0  0  6  0]]
Classification Report:
              precision    recall  f1-score   support

     Level 2       0.00      0.00      0.00         7
     Level 3       0.76      0.88      0.82       101
     Level 4       0.78      0.76      0.77        86
     Level 5       0.00      0.00      0.00         6

    accuracy                           0.77       200
   macro avg       0.39      0.41      0.40       200
weighted avg       0.72      0.77      0.74       200



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
