In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# load the logs table data into a pandas dataframe
df = pd.read_csv('logs2.csv')

# replace missing values in the 'priority' column with a default value of 0
df['priority'] = df['priority'].fillna(0)

# convert the 'priority' column into a categorical variable with three classes
df['priority'] = pd.cut(df['priority'], bins=[-float('inf'), 2, 5, float('inf')], labels=['low', 'medium', 'high'])


# select the columns to use for the model
columns = ['job', 'project', 'type', 'origin', 'message', 'priority']

# create a new dataframe with only the selected columns
data = df[columns]

# encode the categorical variables as numerical features
data_enc = pd.get_dummies(data.drop('priority', axis=1))

# split the data into training and testing sets
train_data, test_data, train_target, test_target = train_test_split(data_enc, data['priority'], test_size=0.2)

# create a Random Forest classifier
rf = RandomForestClassifier(n_estimators=100, random_state=42)

# fit the classifier to the training data
rf.fit(train_data, train_target)

# make predictions on the test data
predictions = rf.predict(test_data)

# evaluate the performance of the model
print(classification_report(test_target, predictions))

              precision    recall  f1-score   support

        high       0.96      1.00      0.98        24
         low       1.00      1.00      1.00      1140
      medium       1.00      1.00      1.00      1194

    accuracy                           1.00      2358
   macro avg       0.99      1.00      0.99      2358
weighted avg       1.00      1.00      1.00      2358

