In [26]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report, confusion_matrix

data=pd.read_csv('../DataEngineering/Report_Layer/fact.csv')

# Create DataFrame
df = pd.DataFrame(data)

# Encode categorical variables
le_designation = LabelEncoder()
le_difficulty = LabelEncoder()
le_learning_path = LabelEncoder()
le_course_name=LabelEncoder()

df['designation'] = le_designation.fit_transform(df['designation'])
df['difficulty_level'] = le_difficulty.fit_transform(df['difficulty_level'])
df['learning_path'] = le_learning_path.fit_transform(df['learning_path'])
df['course_name'] = le_course_name.fit_transform(df['course_name'])

# Features and target variable
X = df[['designation', 'duration_weeks', 'difficulty_level', 'completion_rate']]
y = df['learning_path']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create Gradient Boosting model
gb_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
gb_model.fit(X_train, y_train)

# Make predictions
y_pred = gb_model.predict(X_test)

# Evaluation
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Optional: Predict a new sample
new_employee = pd.DataFrame({
    'designation': [le_designation.transform(['DevOps'])[0]], 
    'duration_weeks': [4], 
    'difficulty_level': [le_difficulty.transform(['Beginner'])[0]], 
    'completion_rate': [8.0]
})

predicted_learning_path = le_learning_path.inverse_transform(gb_model.predict(new_employee))
print(f"\nPredicted Learning Path for the new employee: {predicted_learning_path[0]}")


Confusion Matrix:
[[36  0  4  9 14]
 [ 2 29  5 36  8]
 [ 8 24 16 39  6]
 [ 9  8 13 59  7]
 [ 8  1  6 17 36]]

Classification Report:
              precision    recall  f1-score   support

           0       0.57      0.57      0.57        63
           1       0.47      0.36      0.41        80
           2       0.36      0.17      0.23        93
           3       0.37      0.61      0.46        96
           4       0.51      0.53      0.52        68

    accuracy                           0.44       400
   macro avg       0.46      0.45      0.44       400
weighted avg       0.44      0.44      0.42       400


Predicted Learning Path for the new employee: Machine Learning
