#Implement priority prediction model using Random Forest/XGBoost.


In [3]:
#  Step 1: Import Libraries
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report



In [5]:
#  Step 2: Folder Path (change only if your folder is different)
folder_path = os.path.join(os.path.expanduser("~"), "Downloads", "project")


In [25]:
#  Step 3: Auto-Detect Dataset
possible_files = [
    "AI_Powered_Task_Management_System_2000.csv",
    "Cleaned_AI_Task_Data.xls",
    "Cleaned_AI_Task_Data_NLP.xls",
    "AI_Task_BERT_Features.xls",
    "Task_Classification_Results.xls"
]

file_path = None
for fname in possible_files:
    fpath = os.path.join(folder_path, fname)
    if os.path.exists(fpath):
        file_path = fpath
        print(f" Found dataset: {fname}")
        break

if not file_path:
    raise FileNotFoundError(" No dataset found in your 'Downloads/project' folder!")


 Found dataset: AI_Powered_Task_Management_System_2000.csv


In [27]:
#  Step 4: Load Dataset
try:
    if file_path.endswith(".csv"):
        df = pd.read_csv(file_path)
        print("CSV file loaded successfully!")
    elif file_path.endswith(".xlsx"):
        df = pd.read_excel(file_path, engine="openpyxl")
        print(" XLSX Excel file loaded successfully!")
    elif file_path.endswith(".xls"):
        df = pd.read_excel(file_path, engine="xlrd")
        print("XLS Excel file loaded successfully!")
    else:
        raise ValueError(" Unsupported file format! Use CSV or Excel only.")
except Exception as e:
    raise RuntimeError(f" Error while loading dataset: {e}")

CSV file loaded successfully!


In [29]:
#  Step 5: Preview
print("\n Sample Data:")
print(df.head())



 Sample Data:
   Task_ID                      Title  \
0        1              Data Analysis   
1        2                Backend API   
2        3              Data Analysis   
3        4              Data Analysis   
4        5  User Feedback Integration   

                                         Description Assignee Created_Date  \
0     Documentation task for Data Analytics project.   George   19/07/2024   
1       Backend API task for Data Analytics project.    Diana   14/10/2024   
2  User Feedback Integration task for Smart Workf...   George   18/06/2024   
3  User Feedback Integration task for Data Analyt...    Ethan   13/06/2024   
4    Documentation task for AI Task Manager project.    Fiona   06/04/2024   

     Due_Date Completed_Date     Status  Priority  Estimated_Hours  \
0  29/07/2024     29/07/2024  Completed  Critical                3   
1  23/10/2024            NaN  Completed    Medium               20   
2  26/06/2024     26/06/2024  Completed      High          

In [31]:
#  Step 6: Ensure target column exists
if 'priority' not in df.columns:
    print("\n'priority' column not found — creating sample values automatically.")
    np.random.seed(42)
    df['priority'] = np.random.choice(['Low', 'Medium', 'High'], size=len(df))
else:
    print(" 'priority' column found!")


'priority' column not found — creating sample values automatically.


In [33]:
#  Step 7: Encode Categorical Columns
le = LabelEncoder()
for col in df.select_dtypes(include=['object']).columns:
    df[col] = le.fit_transform(df[col].astype(str))


In [35]:
#  Step 8: Split Dataset
X = df.drop('priority', axis=1)
y = df['priority']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("\n Data split into training and testing sets!")


 Data split into training and testing sets!


In [37]:
#  Step 9: Train Models
rf_model = RandomForestClassifier(random_state=42)
xgb_model = XGBClassifier(random_state=42, eval_metric='mlogloss')

rf_model.fit(X_train, y_train)
xgb_model.fit(X_train, y_train)

In [39]:
#  Step 10: Predictions
rf_pred = rf_model.predict(X_test)
xgb_pred = xgb_model.predict(X_test)

In [41]:
#  Step 11: Evaluation
print("\n Random Forest Results:")
print("Accuracy:", accuracy_score(y_test, rf_pred))
print("Classification Report:\n", classification_report(y_test, rf_pred))

print("\n XGBoost Results:")
print("Accuracy:", accuracy_score(y_test, xgb_pred))
print("Classification Report:\n", classification_report(y_test, xgb_pred))

print("\n Model training completed successfully!")



 Random Forest Results:
Accuracy: 0.3775
Classification Report:
               precision    recall  f1-score   support

           0       0.39      0.41      0.40       133
           1       0.39      0.48      0.43       128
           2       0.34      0.25      0.29       139

    accuracy                           0.38       400
   macro avg       0.37      0.38      0.37       400
weighted avg       0.37      0.38      0.37       400


 XGBoost Results:
Accuracy: 0.37
Classification Report:
               precision    recall  f1-score   support

           0       0.36      0.35      0.35       133
           1       0.36      0.43      0.39       128
           2       0.39      0.34      0.36       139

    accuracy                           0.37       400
   macro avg       0.37      0.37      0.37       400
weighted avg       0.37      0.37      0.37       400


 Model training completed successfully!
