In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier
from sklearn.metrics import classification_report, precision_score
from sklearn.utils import shuffle

# 1. Load the Data
data = pd.read_csv('bugs-train.csv')

# 2. Preprocess the Data (if needed)
data = shuffle(data, random_state=42)

# 3. Split the Data
X_train, X_test, y_train, y_test = train_test_split(data['summary'], data['severity'], test_size=0.2, random_state=42)

# 4. Feature Extraction: Convert text data to numerical features using TF-IDF
tfidf = TfidfVectorizer()
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

# 5. Label Encoding for target variable
le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.transform(y_test)

# 6. Train a Bagging Random Forest model with bootstrap sampling
random_forest = RandomForestClassifier(n_estimators=100, random_state=42)
bagging_rf = BaggingClassifier(base_estimator=random_forest, n_estimators=10, random_state=42, bootstrap=True)
bagging_rf.fit(X_train_tfidf, y_train_encoded)

# 7. Predictions for Bagging Random Forest
y_pred_bagging_rf = bagging_rf.predict(X_test_tfidf)

# 8. Convert predictions back to original labels
y_pred_labels_bagging_rf = le.inverse_transform(y_pred_bagging_rf)

# 9. Evaluate the Model
print("Bagging Random Forest Classification Report:\n", classification_report(y_test, y_pred_labels_bagging_rf))
print("Macro Precision Bagging Random Forest:", precision_score(y_test, y_pred_labels_bagging_rf, average='macro'))

# 10. Load the Test Data
test_data = pd.read_csv('bugs-test.csv')

# 11. Preprocess the Test Data (if needed)

# 12. Transform the Test Data using TF-IDF vectorizer
X_test_final = tfidf.transform(test_data['summary'])

# 13. Predict the labels for the test data using Bagging Random Forest
y_pred_test_bagging_rf = bagging_rf.predict(X_test_final)
y_pred_test_labels_bagging_rf = le.inverse_transform(y_pred_test_bagging_rf)

# 14. Add bug IDs and predicted severities to the test data
bug_ids = test_data['bug_id']

# Save predictions for Bagging Random Forest
predicted_data_bagging_rf = pd.DataFrame({'bug_id': bug_ids, 'predicted_severity': y_pred_test_labels_bagging_rf})
output_file_bagging_rf = 'predictions_bagging_rf.csv'
predicted_data_bagging_rf.to_csv(output_file_bagging_rf, index=False)
print(f"Predictions saved to {output_file_bagging_rf}!")


FileNotFoundError: [Errno 2] No such file or directory: 'bugs-train.csv'