In [9]:
# Export XGBoost model and artifacts for smart triage UI

# 1) Imports and setup
import os
import json
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from imblearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE
from sklearn.metrics import classification_report
import xgboost as xgb

from sklearn.feature_extraction.text import TfidfVectorizer
from model_utils import save_model_artifacts
from feature_engineering import TextFeatureExtractor

RANDOM_STATE = 42
OUTPUT_DIR = 'model_artifacts'


In [10]:
# 2) Load processed data and define targets consistent with training

df = pd.read_csv('github_issues_processed.csv')

cat_targets = [
    'is_bug_cat','is_feature_cat','is_doc_cat',
    'is_help_cat','is_priority_cat','is_status_cat'
]
df['category'] = df[cat_targets].idxmax(axis=1)

label_targets = [col for col in df.columns if col.startswith('has_')]

exclude = cat_targets + label_targets + ['n_labels', 'category']
X = df.drop(columns=exclude)
y_cat = df['category']


In [11]:
# 3) Train/test split (same split strategy)
X_train, X_test, y_cat_train, y_cat_test = train_test_split(
    X, y_cat, test_size=0.2, random_state=RANDOM_STATE, stratify=y_cat
)


In [13]:
# 4) Fit vectorizers/encoders exactly as used downstream
# Fit LabelEncoder on training categories (matching training notebook flow)
label_encoder = LabelEncoder()
y_cat_train_encoded = label_encoder.fit_transform(y_cat_train)
y_cat_test_encoded = label_encoder.transform(y_cat_test)

# Fit repository encoder from raw data (processed data only has repo_encoded)
repo_encoder = LabelEncoder()
raw_df = pd.read_csv('github_issues.csv', usecols=['title', 'body', 'repo_name'])
repo_series = raw_df['repo_name'].astype(str).fillna('unknown_repo')
repo_encoder.fit(repo_series)

# Fit TF-IDF on combined text from raw title+body to mirror inference
# For export, we only need the fitted vectorizer to match inference; training here uses processed X.
tfidf = TfidfVectorizer(max_features=250, stop_words='english', ngram_range=(1,2))
combined_text = raw_df[['title', 'body']].fillna('').apply(lambda x: ' '.join(x), axis=1)
tfidf.fit(combined_text)

# Feature extractor (used only for UI/inference, not training here)
feature_extractor = TextFeatureExtractor(tfidf)


In [14]:
# 5) Configure XGBoost exactly as training notebook
xgb_config = {
    'n_estimators': 200,
    'learning_rate': 0.1,
    'max_depth': 5,
    'min_child_weight': 2,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'random_state': RANDOM_STATE,
    'n_jobs': -1,
    'objective': 'multi:softprob'
}

# Set num_class from training labels
xgb_config['num_class'] = len(np.unique(y_cat_train_encoded))


In [15]:
# 6) Train XGBoost with SMOTE on the processed feature matrix (as in training)
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline

xgb_pipeline = Pipeline([
    ('smote', SMOTE(random_state=RANDOM_STATE, k_neighbors=2)),
    ('xgb', xgb.XGBClassifier(**xgb_config))
])

xgb_pipeline.fit(X_train, y_cat_train_encoded)

y_pred_encoded = xgb_pipeline.predict(X_test)
y_proba = xgb_pipeline.predict_proba(X_test)

# For sanity: decode for a quick classification report
y_pred = label_encoder.inverse_transform(y_pred_encoded)
print("XGBoost with SMOTE — quick eval on holdout")
print(classification_report(y_cat_test, y_pred, zero_division=0))


XGBoost with SMOTE — quick eval on holdout
                 precision    recall  f1-score   support

     is_bug_cat       0.96      0.99      0.97      2961
     is_doc_cat       0.52      0.35      0.42        79
 is_feature_cat       0.64      0.35      0.45       119
    is_help_cat       0.00      0.00      0.00         3
is_priority_cat       0.00      0.00      0.00         1
  is_status_cat       0.90      1.00      0.95        28

       accuracy                           0.94      3191
      macro avg       0.50      0.45      0.47      3191
   weighted avg       0.93      0.94      0.94      3191



In [16]:
# 7) Persist model and artifacts for UI inference
# We save the trained pipeline's XGB classifier, the TF-IDF vectorizer, the label encoder, and the repo encoder.

# Extract the trained XGB model from pipeline for saving
trained_model = xgb_pipeline.named_steps['xgb']

save_model_artifacts(
    model=trained_model,
    tfidf_vectorizer=tfidf,
    label_encoder=label_encoder,
    repo_encoder=repo_encoder,
    output_dir=OUTPUT_DIR
)

print(f"Saved artifacts to: {OUTPUT_DIR}")


Saved artifacts to: model_artifacts


In [None]:
# 8) Reload modules to pick up updated feature schema
import importlib, feature_engineering, smart_triage
importlib.reload(feature_engineering)
importlib.reload(smart_triage)
from smart_triage import SmartIssueTriage


In [None]:
# 9) Quick verification snippet for UI-like prediction with reloaded modules
triage = SmartIssueTriage(model_dir=OUTPUT_DIR)

sample_title = "Error in login flow: users cannot reset password"
sample_body = "Users report password reset links error out with 500. Happens since v2.3."
sample_repo = repo_encoder.classes_[0]

features = triage.feature_extractor.extract_all_features(
    text=f"{sample_title}\n{sample_body}",
    repo=sample_repo,
    repo_encoder=repo_encoder
)

# Debug: print feature names vs model booster names if mismatch persists
model_feature_names = triage.model.get_booster().feature_names
first_10 = features.columns.tolist()[:10]
print("Model features (count):", len(model_feature_names))
print("Input features (count):", features.shape[1])
print("First 10 input cols:", first_10)

result = triage.predict(
    title=sample_title,
    body=sample_body,
    repo=sample_repo,
    threshold=0.35
)

print(json.dumps(result, indent=2)[:2000])


In [19]:
# 8) Quick verification snippet for UI-like prediction
from smart_triage import SmartIssueTriage

triage = SmartIssueTriage(model_dir=OUTPUT_DIR)

sample_title = "Error in login flow: users cannot reset password"
sample_body = "Users report password reset links error out with 500. Happens since v2.3."
# pick a repo from the fitted encoder classes for a valid mapping
sample_repo = repo_encoder.classes_[0]

result = triage.predict(
    title=sample_title,
    body=sample_body,
    repo=sample_repo,
    threshold=0.35
)

print(json.dumps(result, indent=2)[:2000])


ValueError: feature_names mismatch: ['created_hour', 'created_day_of_week', 'created_month', 'n_days_to_resolution', 'title_length', 'body_length', 'title_word_count', 'body_word_count', 'code_block_count', 'url_count', 'title_question_word_count', 'title_has_question_mark', 'body_question_word_count', 'body_has_question_mark', 'total_question_word_count', 'total_has_question_mark', 'includes_questions', 'title_n_urgent_words', 'title_has_exclamation', 'body_n_urgent_words', 'body_has_exclamation', 'total_n_urgent_words', 'total_has_exclamation', 'urgency_score', 'repo_encoded', 'tfidf_0', 'tfidf_1', 'tfidf_2', 'tfidf_3', 'tfidf_4', 'tfidf_5', 'tfidf_6', 'tfidf_7', 'tfidf_8', 'tfidf_9', 'tfidf_10', 'tfidf_11', 'tfidf_12', 'tfidf_13', 'tfidf_14', 'tfidf_15', 'tfidf_16', 'tfidf_17', 'tfidf_18', 'tfidf_19', 'tfidf_20', 'tfidf_21', 'tfidf_22', 'tfidf_23', 'tfidf_24', 'tfidf_25', 'tfidf_26', 'tfidf_27', 'tfidf_28', 'tfidf_29', 'tfidf_30', 'tfidf_31', 'tfidf_32', 'tfidf_33', 'tfidf_34', 'tfidf_35', 'tfidf_36', 'tfidf_37', 'tfidf_38', 'tfidf_39', 'tfidf_40', 'tfidf_41', 'tfidf_42', 'tfidf_43', 'tfidf_44', 'tfidf_45', 'tfidf_46', 'tfidf_47', 'tfidf_48', 'tfidf_49', 'tfidf_50', 'tfidf_51', 'tfidf_52', 'tfidf_53', 'tfidf_54', 'tfidf_55', 'tfidf_56', 'tfidf_57', 'tfidf_58', 'tfidf_59', 'tfidf_60', 'tfidf_61', 'tfidf_62', 'tfidf_63', 'tfidf_64', 'tfidf_65', 'tfidf_66', 'tfidf_67', 'tfidf_68', 'tfidf_69', 'tfidf_70', 'tfidf_71', 'tfidf_72', 'tfidf_73', 'tfidf_74', 'tfidf_75', 'tfidf_76', 'tfidf_77', 'tfidf_78', 'tfidf_79', 'tfidf_80', 'tfidf_81', 'tfidf_82', 'tfidf_83', 'tfidf_84', 'tfidf_85', 'tfidf_86', 'tfidf_87', 'tfidf_88', 'tfidf_89', 'tfidf_90', 'tfidf_91', 'tfidf_92', 'tfidf_93', 'tfidf_94', 'tfidf_95', 'tfidf_96', 'tfidf_97', 'tfidf_98', 'tfidf_99', 'tfidf_100', 'tfidf_101', 'tfidf_102', 'tfidf_103', 'tfidf_104', 'tfidf_105', 'tfidf_106', 'tfidf_107', 'tfidf_108', 'tfidf_109', 'tfidf_110', 'tfidf_111', 'tfidf_112', 'tfidf_113', 'tfidf_114', 'tfidf_115', 'tfidf_116', 'tfidf_117', 'tfidf_118', 'tfidf_119', 'tfidf_120', 'tfidf_121', 'tfidf_122', 'tfidf_123', 'tfidf_124', 'tfidf_125', 'tfidf_126', 'tfidf_127', 'tfidf_128', 'tfidf_129', 'tfidf_130', 'tfidf_131', 'tfidf_132', 'tfidf_133', 'tfidf_134', 'tfidf_135', 'tfidf_136', 'tfidf_137', 'tfidf_138', 'tfidf_139', 'tfidf_140', 'tfidf_141', 'tfidf_142', 'tfidf_143', 'tfidf_144', 'tfidf_145', 'tfidf_146', 'tfidf_147', 'tfidf_148', 'tfidf_149', 'tfidf_150', 'tfidf_151', 'tfidf_152', 'tfidf_153', 'tfidf_154', 'tfidf_155', 'tfidf_156', 'tfidf_157', 'tfidf_158', 'tfidf_159', 'tfidf_160', 'tfidf_161', 'tfidf_162', 'tfidf_163', 'tfidf_164', 'tfidf_165', 'tfidf_166', 'tfidf_167', 'tfidf_168', 'tfidf_169', 'tfidf_170', 'tfidf_171', 'tfidf_172', 'tfidf_173', 'tfidf_174', 'tfidf_175', 'tfidf_176', 'tfidf_177', 'tfidf_178', 'tfidf_179', 'tfidf_180', 'tfidf_181', 'tfidf_182', 'tfidf_183', 'tfidf_184', 'tfidf_185', 'tfidf_186', 'tfidf_187', 'tfidf_188', 'tfidf_189', 'tfidf_190', 'tfidf_191', 'tfidf_192', 'tfidf_193', 'tfidf_194', 'tfidf_195', 'tfidf_196', 'tfidf_197', 'tfidf_198', 'tfidf_199', 'tfidf_200', 'tfidf_201', 'tfidf_202', 'tfidf_203', 'tfidf_204', 'tfidf_205', 'tfidf_206', 'tfidf_207', 'tfidf_208', 'tfidf_209', 'tfidf_210', 'tfidf_211', 'tfidf_212', 'tfidf_213', 'tfidf_214', 'tfidf_215', 'tfidf_216', 'tfidf_217', 'tfidf_218', 'tfidf_219', 'tfidf_220', 'tfidf_221', 'tfidf_222', 'tfidf_223', 'tfidf_224', 'tfidf_225', 'tfidf_226', 'tfidf_227', 'tfidf_228', 'tfidf_229', 'tfidf_230', 'tfidf_231', 'tfidf_232', 'tfidf_233', 'tfidf_234', 'tfidf_235', 'tfidf_236', 'tfidf_237', 'tfidf_238', 'tfidf_239', 'tfidf_240', 'tfidf_241', 'tfidf_242', 'tfidf_243', 'tfidf_244', 'tfidf_245', 'tfidf_246', 'tfidf_247', 'tfidf_248', 'tfidf_249', 'bert_0', 'bert_1', 'bert_2', 'bert_3', 'bert_4', 'bert_5', 'bert_6', 'bert_7', 'bert_8', 'bert_9', 'bert_10', 'bert_11', 'bert_12', 'bert_13', 'bert_14', 'bert_15', 'bert_16', 'bert_17', 'bert_18', 'bert_19', 'bert_20', 'bert_21', 'bert_22', 'bert_23', 'bert_24', 'bert_25', 'bert_26', 'bert_27', 'bert_28', 'bert_29', 'bert_30', 'bert_31', 'bert_32', 'bert_33', 'bert_34', 'bert_35', 'bert_36', 'bert_37', 'bert_38', 'bert_39', 'bert_40', 'bert_41', 'bert_42', 'bert_43', 'bert_44', 'bert_45', 'bert_46', 'bert_47', 'bert_48', 'bert_49', 'bert_50', 'bert_51', 'bert_52', 'bert_53', 'bert_54', 'bert_55', 'bert_56', 'bert_57', 'bert_58', 'bert_59', 'bert_60', 'bert_61', 'bert_62', 'bert_63', 'bert_64', 'bert_65', 'bert_66', 'bert_67', 'bert_68', 'bert_69', 'bert_70', 'bert_71', 'bert_72', 'bert_73', 'bert_74', 'bert_75', 'bert_76', 'bert_77', 'bert_78', 'bert_79', 'bert_80', 'bert_81', 'bert_82', 'bert_83', 'bert_84', 'bert_85', 'bert_86', 'bert_87', 'bert_88', 'bert_89', 'bert_90', 'bert_91', 'bert_92', 'bert_93', 'bert_94', 'bert_95', 'bert_96', 'bert_97', 'bert_98', 'bert_99', 'bert_100', 'bert_101', 'bert_102', 'bert_103', 'bert_104', 'bert_105', 'bert_106', 'bert_107', 'bert_108', 'bert_109', 'bert_110', 'bert_111', 'bert_112', 'bert_113', 'bert_114', 'bert_115', 'bert_116', 'bert_117', 'bert_118', 'bert_119', 'bert_120', 'bert_121', 'bert_122', 'bert_123', 'bert_124', 'bert_125', 'bert_126', 'bert_127', 'bert_128', 'bert_129', 'bert_130', 'bert_131', 'bert_132', 'bert_133', 'bert_134', 'bert_135', 'bert_136', 'bert_137', 'bert_138', 'bert_139', 'bert_140', 'bert_141', 'bert_142', 'bert_143', 'bert_144', 'bert_145', 'bert_146', 'bert_147', 'bert_148', 'bert_149', 'bert_150', 'bert_151', 'bert_152', 'bert_153', 'bert_154', 'bert_155', 'bert_156', 'bert_157', 'bert_158', 'bert_159', 'bert_160', 'bert_161', 'bert_162', 'bert_163', 'bert_164', 'bert_165', 'bert_166', 'bert_167', 'bert_168', 'bert_169', 'bert_170', 'bert_171', 'bert_172', 'bert_173', 'bert_174', 'bert_175', 'bert_176', 'bert_177', 'bert_178', 'bert_179', 'bert_180', 'bert_181', 'bert_182', 'bert_183', 'bert_184', 'bert_185', 'bert_186', 'bert_187', 'bert_188', 'bert_189', 'bert_190', 'bert_191', 'bert_192', 'bert_193', 'bert_194', 'bert_195', 'bert_196', 'bert_197', 'bert_198', 'bert_199', 'bert_200', 'bert_201', 'bert_202', 'bert_203', 'bert_204', 'bert_205', 'bert_206', 'bert_207', 'bert_208', 'bert_209', 'bert_210', 'bert_211', 'bert_212', 'bert_213', 'bert_214', 'bert_215', 'bert_216', 'bert_217', 'bert_218', 'bert_219', 'bert_220', 'bert_221', 'bert_222', 'bert_223', 'bert_224', 'bert_225', 'bert_226', 'bert_227', 'bert_228', 'bert_229', 'bert_230', 'bert_231', 'bert_232', 'bert_233', 'bert_234', 'bert_235', 'bert_236', 'bert_237', 'bert_238', 'bert_239', 'bert_240', 'bert_241', 'bert_242', 'bert_243', 'bert_244', 'bert_245', 'bert_246', 'bert_247', 'bert_248', 'bert_249', 'bert_250', 'bert_251', 'bert_252', 'bert_253', 'bert_254', 'bert_255', 'bert_256', 'bert_257', 'bert_258', 'bert_259', 'bert_260', 'bert_261', 'bert_262', 'bert_263', 'bert_264', 'bert_265', 'bert_266', 'bert_267', 'bert_268', 'bert_269', 'bert_270', 'bert_271', 'bert_272', 'bert_273', 'bert_274', 'bert_275', 'bert_276', 'bert_277', 'bert_278', 'bert_279', 'bert_280', 'bert_281', 'bert_282', 'bert_283', 'bert_284', 'bert_285', 'bert_286', 'bert_287', 'bert_288', 'bert_289', 'bert_290', 'bert_291', 'bert_292', 'bert_293', 'bert_294', 'bert_295', 'bert_296', 'bert_297', 'bert_298', 'bert_299', 'bert_300', 'bert_301', 'bert_302', 'bert_303', 'bert_304', 'bert_305', 'bert_306', 'bert_307', 'bert_308', 'bert_309', 'bert_310', 'bert_311', 'bert_312', 'bert_313', 'bert_314', 'bert_315', 'bert_316', 'bert_317', 'bert_318', 'bert_319', 'bert_320', 'bert_321', 'bert_322', 'bert_323', 'bert_324', 'bert_325', 'bert_326', 'bert_327', 'bert_328', 'bert_329', 'bert_330', 'bert_331', 'bert_332', 'bert_333', 'bert_334', 'bert_335', 'bert_336', 'bert_337', 'bert_338', 'bert_339', 'bert_340', 'bert_341', 'bert_342', 'bert_343', 'bert_344', 'bert_345', 'bert_346', 'bert_347', 'bert_348', 'bert_349', 'bert_350', 'bert_351', 'bert_352', 'bert_353', 'bert_354', 'bert_355', 'bert_356', 'bert_357', 'bert_358', 'bert_359', 'bert_360', 'bert_361', 'bert_362', 'bert_363', 'bert_364', 'bert_365', 'bert_366', 'bert_367', 'bert_368', 'bert_369', 'bert_370', 'bert_371', 'bert_372', 'bert_373', 'bert_374', 'bert_375', 'bert_376', 'bert_377', 'bert_378', 'bert_379', 'bert_380', 'bert_381', 'bert_382', 'bert_383'] ['n_labels', 'has_bug_label', 'has_good_first_issue_label', 'has_help_wanted_label', 'has_enhancement_label', 'has_documentation_label', 'has_stale_label', 'has_waiting_for_customer_response_label', 'has_regression_label', 'has_question_label', 'has_needs_triage_label', 'has_issue__confirmed_label', 'has_type__bug_label', 'has_fixed_label', 'has_topic_editor_label', 'has_docs_label', 'has_confirmed_label', 'has_in_triage_label', 'has_customer_reported_label', 'has_client_label', 'has_inactive_label', 'has_type_bug_label', 'has_p2_label', 'has_archived_label', 'has_backlog_label', 'created_hour', 'created_day_of_week', 'created_month', 'n_days_to_resolution', 'title_length', 'body_length', 'title_word_count', 'body_word_count', 'has_code_blocks', 'code_block_count', 'url_count', 'has_urls', 'title_question_word_count', 'title_has_question_mark', 'body_question_word_count', 'body_has_question_mark', 'total_question_word_count', 'total_has_question_mark', 'includes_questions', 'title_n_urgent_words', 'title_has_exclamation', 'body_n_urgent_words', 'body_has_exclamation', 'total_n_urgent_words', 'total_has_exclamation', 'urgency_score', 'repo_encoded', 'tfidf_0', 'tfidf_1', 'tfidf_2', 'tfidf_3', 'tfidf_4', 'tfidf_5', 'tfidf_6', 'tfidf_7', 'tfidf_8', 'tfidf_9', 'tfidf_10', 'tfidf_11', 'tfidf_12', 'tfidf_13', 'tfidf_14', 'tfidf_15', 'tfidf_16', 'tfidf_17', 'tfidf_18', 'tfidf_19', 'tfidf_20', 'tfidf_21', 'tfidf_22', 'tfidf_23', 'tfidf_24', 'tfidf_25', 'tfidf_26', 'tfidf_27', 'tfidf_28', 'tfidf_29', 'tfidf_30', 'tfidf_31', 'tfidf_32', 'tfidf_33', 'tfidf_34', 'tfidf_35', 'tfidf_36', 'tfidf_37', 'tfidf_38', 'tfidf_39', 'tfidf_40', 'tfidf_41', 'tfidf_42', 'tfidf_43', 'tfidf_44', 'tfidf_45', 'tfidf_46', 'tfidf_47', 'tfidf_48', 'tfidf_49', 'tfidf_50', 'tfidf_51', 'tfidf_52', 'tfidf_53', 'tfidf_54', 'tfidf_55', 'tfidf_56', 'tfidf_57', 'tfidf_58', 'tfidf_59', 'tfidf_60', 'tfidf_61', 'tfidf_62', 'tfidf_63', 'tfidf_64', 'tfidf_65', 'tfidf_66', 'tfidf_67', 'tfidf_68', 'tfidf_69', 'tfidf_70', 'tfidf_71', 'tfidf_72', 'tfidf_73', 'tfidf_74', 'tfidf_75', 'tfidf_76', 'tfidf_77', 'tfidf_78', 'tfidf_79', 'tfidf_80', 'tfidf_81', 'tfidf_82', 'tfidf_83', 'tfidf_84', 'tfidf_85', 'tfidf_86', 'tfidf_87', 'tfidf_88', 'tfidf_89', 'tfidf_90', 'tfidf_91', 'tfidf_92', 'tfidf_93', 'tfidf_94', 'tfidf_95', 'tfidf_96', 'tfidf_97', 'tfidf_98', 'tfidf_99', 'tfidf_100', 'tfidf_101', 'tfidf_102', 'tfidf_103', 'tfidf_104', 'tfidf_105', 'tfidf_106', 'tfidf_107', 'tfidf_108', 'tfidf_109', 'tfidf_110', 'tfidf_111', 'tfidf_112', 'tfidf_113', 'tfidf_114', 'tfidf_115', 'tfidf_116', 'tfidf_117', 'tfidf_118', 'tfidf_119', 'tfidf_120', 'tfidf_121', 'tfidf_122', 'tfidf_123', 'tfidf_124', 'tfidf_125', 'tfidf_126', 'tfidf_127', 'tfidf_128', 'tfidf_129', 'tfidf_130', 'tfidf_131', 'tfidf_132', 'tfidf_133', 'tfidf_134', 'tfidf_135', 'tfidf_136', 'tfidf_137', 'tfidf_138', 'tfidf_139', 'tfidf_140', 'tfidf_141', 'tfidf_142', 'tfidf_143', 'tfidf_144', 'tfidf_145', 'tfidf_146', 'tfidf_147', 'tfidf_148', 'tfidf_149', 'tfidf_150', 'tfidf_151', 'tfidf_152', 'tfidf_153', 'tfidf_154', 'tfidf_155', 'tfidf_156', 'tfidf_157', 'tfidf_158', 'tfidf_159', 'tfidf_160', 'tfidf_161', 'tfidf_162', 'tfidf_163', 'tfidf_164', 'tfidf_165', 'tfidf_166', 'tfidf_167', 'tfidf_168', 'tfidf_169', 'tfidf_170', 'tfidf_171', 'tfidf_172', 'tfidf_173', 'tfidf_174', 'tfidf_175', 'tfidf_176', 'tfidf_177', 'tfidf_178', 'tfidf_179', 'tfidf_180', 'tfidf_181', 'tfidf_182', 'tfidf_183', 'tfidf_184', 'tfidf_185', 'tfidf_186', 'tfidf_187', 'tfidf_188', 'tfidf_189', 'tfidf_190', 'tfidf_191', 'tfidf_192', 'tfidf_193', 'tfidf_194', 'tfidf_195', 'tfidf_196', 'tfidf_197', 'tfidf_198', 'tfidf_199', 'tfidf_200', 'tfidf_201', 'tfidf_202', 'tfidf_203', 'tfidf_204', 'tfidf_205', 'tfidf_206', 'tfidf_207', 'tfidf_208', 'tfidf_209', 'tfidf_210', 'tfidf_211', 'tfidf_212', 'tfidf_213', 'tfidf_214', 'tfidf_215', 'tfidf_216', 'tfidf_217', 'tfidf_218', 'tfidf_219', 'tfidf_220', 'tfidf_221', 'tfidf_222', 'tfidf_223', 'tfidf_224', 'tfidf_225', 'tfidf_226', 'tfidf_227', 'tfidf_228', 'tfidf_229', 'tfidf_230', 'tfidf_231', 'tfidf_232', 'tfidf_233', 'tfidf_234', 'tfidf_235', 'tfidf_236', 'tfidf_237', 'tfidf_238', 'tfidf_239', 'tfidf_240', 'tfidf_241', 'tfidf_242', 'tfidf_243', 'tfidf_244', 'tfidf_245', 'tfidf_246', 'tfidf_247', 'tfidf_248', 'tfidf_249', 'bert_0', 'bert_1', 'bert_2', 'bert_3', 'bert_4', 'bert_5', 'bert_6', 'bert_7', 'bert_8', 'bert_9', 'bert_10', 'bert_11', 'bert_12', 'bert_13', 'bert_14', 'bert_15', 'bert_16', 'bert_17', 'bert_18', 'bert_19', 'bert_20', 'bert_21', 'bert_22', 'bert_23', 'bert_24', 'bert_25', 'bert_26', 'bert_27', 'bert_28', 'bert_29', 'bert_30', 'bert_31', 'bert_32', 'bert_33', 'bert_34', 'bert_35', 'bert_36', 'bert_37', 'bert_38', 'bert_39', 'bert_40', 'bert_41', 'bert_42', 'bert_43', 'bert_44', 'bert_45', 'bert_46', 'bert_47', 'bert_48', 'bert_49', 'bert_50', 'bert_51', 'bert_52', 'bert_53', 'bert_54', 'bert_55', 'bert_56', 'bert_57', 'bert_58', 'bert_59', 'bert_60', 'bert_61', 'bert_62', 'bert_63', 'bert_64', 'bert_65', 'bert_66', 'bert_67', 'bert_68', 'bert_69', 'bert_70', 'bert_71', 'bert_72', 'bert_73', 'bert_74', 'bert_75', 'bert_76', 'bert_77', 'bert_78', 'bert_79', 'bert_80', 'bert_81', 'bert_82', 'bert_83', 'bert_84', 'bert_85', 'bert_86', 'bert_87', 'bert_88', 'bert_89', 'bert_90', 'bert_91', 'bert_92', 'bert_93', 'bert_94', 'bert_95', 'bert_96', 'bert_97', 'bert_98', 'bert_99', 'bert_100', 'bert_101', 'bert_102', 'bert_103', 'bert_104', 'bert_105', 'bert_106', 'bert_107', 'bert_108', 'bert_109', 'bert_110', 'bert_111', 'bert_112', 'bert_113', 'bert_114', 'bert_115', 'bert_116', 'bert_117', 'bert_118', 'bert_119', 'bert_120', 'bert_121', 'bert_122', 'bert_123', 'bert_124', 'bert_125', 'bert_126', 'bert_127', 'bert_128', 'bert_129', 'bert_130', 'bert_131', 'bert_132', 'bert_133', 'bert_134', 'bert_135', 'bert_136', 'bert_137', 'bert_138', 'bert_139', 'bert_140', 'bert_141', 'bert_142', 'bert_143', 'bert_144', 'bert_145', 'bert_146', 'bert_147', 'bert_148', 'bert_149', 'bert_150', 'bert_151', 'bert_152', 'bert_153', 'bert_154', 'bert_155', 'bert_156', 'bert_157', 'bert_158', 'bert_159', 'bert_160', 'bert_161', 'bert_162', 'bert_163', 'bert_164', 'bert_165', 'bert_166', 'bert_167', 'bert_168', 'bert_169', 'bert_170', 'bert_171', 'bert_172', 'bert_173', 'bert_174', 'bert_175', 'bert_176', 'bert_177', 'bert_178', 'bert_179', 'bert_180', 'bert_181', 'bert_182', 'bert_183', 'bert_184', 'bert_185', 'bert_186', 'bert_187', 'bert_188', 'bert_189', 'bert_190', 'bert_191', 'bert_192', 'bert_193', 'bert_194', 'bert_195', 'bert_196', 'bert_197', 'bert_198', 'bert_199', 'bert_200', 'bert_201', 'bert_202', 'bert_203', 'bert_204', 'bert_205', 'bert_206', 'bert_207', 'bert_208', 'bert_209', 'bert_210', 'bert_211', 'bert_212', 'bert_213', 'bert_214', 'bert_215', 'bert_216', 'bert_217', 'bert_218', 'bert_219', 'bert_220', 'bert_221', 'bert_222', 'bert_223', 'bert_224', 'bert_225', 'bert_226', 'bert_227', 'bert_228', 'bert_229', 'bert_230', 'bert_231', 'bert_232', 'bert_233', 'bert_234', 'bert_235', 'bert_236', 'bert_237', 'bert_238', 'bert_239', 'bert_240', 'bert_241', 'bert_242', 'bert_243', 'bert_244', 'bert_245', 'bert_246', 'bert_247', 'bert_248', 'bert_249', 'bert_250', 'bert_251', 'bert_252', 'bert_253', 'bert_254', 'bert_255', 'bert_256', 'bert_257', 'bert_258', 'bert_259', 'bert_260', 'bert_261', 'bert_262', 'bert_263', 'bert_264', 'bert_265', 'bert_266', 'bert_267', 'bert_268', 'bert_269', 'bert_270', 'bert_271', 'bert_272', 'bert_273', 'bert_274', 'bert_275', 'bert_276', 'bert_277', 'bert_278', 'bert_279', 'bert_280', 'bert_281', 'bert_282', 'bert_283', 'bert_284', 'bert_285', 'bert_286', 'bert_287', 'bert_288', 'bert_289', 'bert_290', 'bert_291', 'bert_292', 'bert_293', 'bert_294', 'bert_295', 'bert_296', 'bert_297', 'bert_298', 'bert_299', 'bert_300', 'bert_301', 'bert_302', 'bert_303', 'bert_304', 'bert_305', 'bert_306', 'bert_307', 'bert_308', 'bert_309', 'bert_310', 'bert_311', 'bert_312', 'bert_313', 'bert_314', 'bert_315', 'bert_316', 'bert_317', 'bert_318', 'bert_319', 'bert_320', 'bert_321', 'bert_322', 'bert_323', 'bert_324', 'bert_325', 'bert_326', 'bert_327', 'bert_328', 'bert_329', 'bert_330', 'bert_331', 'bert_332', 'bert_333', 'bert_334', 'bert_335', 'bert_336', 'bert_337', 'bert_338', 'bert_339', 'bert_340', 'bert_341', 'bert_342', 'bert_343', 'bert_344', 'bert_345', 'bert_346', 'bert_347', 'bert_348', 'bert_349', 'bert_350', 'bert_351', 'bert_352', 'bert_353', 'bert_354', 'bert_355', 'bert_356', 'bert_357', 'bert_358', 'bert_359', 'bert_360', 'bert_361', 'bert_362', 'bert_363', 'bert_364', 'bert_365', 'bert_366', 'bert_367', 'bert_368', 'bert_369', 'bert_370', 'bert_371', 'bert_372', 'bert_373', 'bert_374', 'bert_375', 'bert_376', 'bert_377', 'bert_378', 'bert_379', 'bert_380', 'bert_381', 'bert_382', 'bert_383']
training data did not have the following fields: has_client_label, has_bug_label, has_regression_label, has_p2_label, has_enhancement_label, has_backlog_label, has_help_wanted_label, has_issue__confirmed_label, has_needs_triage_label, has_type__bug_label, has_stale_label, has_waiting_for_customer_response_label, has_good_first_issue_label, has_urls, has_inactive_label, has_question_label, has_code_blocks, has_confirmed_label, has_archived_label, has_type_bug_label, has_customer_reported_label, n_labels, has_topic_editor_label, has_docs_label, has_documentation_label, has_in_triage_label, has_fixed_label