### Random Forest Implementierung mit ClassifierChain

In [26]:
import os
import pickle
import re
import pandas as pd
import json
from sklearn.feature_extraction.text import TfidfVectorizer

In [9]:
model_directory = f'./model_files'

# Laden der Classifier Chain
with open(f'{model_directory}/classifier_chain-v5g.pkl', 'rb') as chain_file:
    classifier_chain = pickle.load(chain_file)

# Laden der Encoder und des TfidfVectorizer
with open(f'{model_directory}/select_encoder-v5g.pkl', 'rb') as file:
    select_encoder = pickle.load(file)

with open(f'{model_directory}/required_encoder-v5g.pkl', 'rb') as file:
    required_encoder = pickle.load(file)

with open(f'{model_directory}/expand_encoder-v5g.pkl', 'rb') as file:
    expand_encoder = pickle.load(file)

with open(f'{model_directory}/expand_select_encoder-v5g.pkl', 'rb') as file:
    expand_select_encoder = pickle.load(file)

with open(f'{model_directory}/endpoint_encoder-v5g.pkl', 'rb') as file:
    endpoint_encoder = pickle.load(file)

with open(f'{model_directory}/method_encoder-v5g.pkl', 'rb') as file:
    method_encoder = pickle.load(file)

with open(f'{model_directory}/presentation_encoder-v5g.pkl', 'rb') as file:
    presentation_encoder = pickle.load(file)

with open(f'{model_directory}/alfred_encoder-v5g.pkl', 'rb') as file:
    alfred_encoder = pickle.load(file)

with open(f'{model_directory}/filter_encoder-v5g.pkl', 'rb') as file:
    filter_encoder = pickle.load(file)

with open(f'{model_directory}/tfidf_vectorizer-v5g.pkl', 'rb') as vectorizer_file:
    vectorizer = pickle.load(vectorizer_file)

### Vorverarbeitung des Prompts

In [11]:
# Funktion zur Bereinigung des Textes
def clean_text(text):
     # Entfernen von Sonderzeichen und Ziffern
     if pd.isna(text):  # Überprüfen auf NaN-Werte
          return text
     text = re.sub(r'[^A-Za-zäöüßÄÖÜ\s]', '', text)

     # Optional: Umwandlung in Kleinbuchstaben
     text = text.lower()

     # Entfernen unnötiger Leerzeichen
     text = text.strip()

     return text

Mit der Variable ```USER_PROMPT``` kann die Klassifikation getestet werden

In [12]:
USER_PROMPT = "Welche Komponenten werden im Auftrag 4711 benötigt?" # Mit diesem Prompt kann die Klassifikation getestet werden.
PRE_USER_PROMPT = clean_text(USER_PROMPT) # Prompt um Sonderzeichen bereinigen

### Durchführung der Vorhersage mit der ClassifierChain

Vektorisierung des Textes für eine verbesserte Berechnung sowie Durchführung der Vorhersage, ausgehend von dem vorverarbeiteten Prompt.

In [13]:
# Vektorisierung des Texts
prompt_vector = vectorizer.transform([PRE_USER_PROMPT])

# Vorhersage mit der Classifier Chain
user_prompt_prediction = classifier_chain.predict(prompt_vector)

Im Nachfolgenden Abschnitt wird die Vorhersage in Klartext dekodiert. Die Reihenfolge der von-bis Werte ist hier sehr wichtig. Die Klassifikationskette, mit der trainiert wurde, gibt diese vor.

In [23]:
# Umwandeln der Vorhersagen in Integer-Werte für die Dekodierung
predicted_endpoint_indices = user_prompt_prediction[:, 0].astype(int).ravel()
predicted_method_indices = user_prompt_prediction[:, 1].astype(int).ravel()
predicted_presentation_indices = user_prompt_prediction[:, 2].astype(int).ravel()

# Dekodieren der Single-Label-Vorhersagen
decoded_prediction_endpoint = endpoint_encoder.inverse_transform(predicted_endpoint_indices)
decoded_prediction_method = method_encoder.inverse_transform(predicted_method_indices)
decoded_prediction_presentation = presentation_encoder.inverse_transform(predicted_presentation_indices)

# Multi-Label-Vorhersagen dekodieren
decoded_prediction_select = select_encoder.inverse_transform(user_prompt_prediction[:, :len(select_encoder.classes_)])
decoded_prediction_required = required_encoder.inverse_transform(user_prompt_prediction[:, len(select_encoder.classes_):len(select_encoder.classes_)+len(required_encoder.classes_)])
decoded_prediction_expand = expand_encoder.inverse_transform(user_prompt_prediction[:, len(select_encoder.classes_)+len(required_encoder.classes_):len(select_encoder.classes_)+len(required_encoder.classes_)+len(expand_encoder.classes_)])
decoded_prediction_expand_select = expand_select_encoder.inverse_transform(user_prompt_prediction[:, len(select_encoder.classes_)+len(required_encoder.classes_)+len(expand_encoder.classes_):len(select_encoder.classes_)+len(required_encoder.classes_)+len(expand_encoder.classes_)+len(expand_select_encoder.classes_)])
decoded_prediction_filter = filter_encoder.inverse_transform(user_prompt_prediction[:, len(select_encoder.classes_)+len(required_encoder.classes_)+len(expand_encoder.classes_)+len(expand_select_encoder.classes_):len(select_encoder.classes_)+len(required_encoder.classes_)+len(expand_encoder.classes_)+len(expand_select_encoder.classes_)+len(filter_encoder.classes_)])
decoded_prediction_alfred = alfred_encoder.inverse_transform(user_prompt_prediction[:, len(select_encoder.classes_)+len(required_encoder.classes_)+len(expand_encoder.classes_)+len(expand_select_encoder.classes_)+len(filter_encoder.classes_):])


# Ausgabe der dekodierten Vorhersagen
print("Endpoint Prediction:", decoded_prediction_endpoint[0])
print("Method Prediction:", decoded_prediction_method[0])
print("Presentation Prediction:", decoded_prediction_presentation[0])
print("Select Prediction:", decoded_prediction_select)
print("Required Prediction:", decoded_prediction_required)
print("Expand Prediction:", decoded_prediction_expand)
print("Expand Select Prediction:", decoded_prediction_expand_select)
print("Filter Prediction:", decoded_prediction_filter)
print("Alfred Prediction:", decoded_prediction_alfred)

Endpoint Prediction: API_PRODUCTION_ORDER_2_SRV/A_ProductionOrderComponent
Method Prediction: GET
Presentation Prediction: blank
Select Prediction: [('ManufacturingOrder',)]
Required Prediction: [('ManufacturingOrder',)]
Expand Prediction: [('to_ProductionOrderComponent',)]
Expand Select Prediction: [('BaseUnit', 'ManufacturingOrderOperation', 'Material', 'ProductionPlant', 'RequiredQuantity')]
Filter Prediction: [()]
Alfred Prediction: [()]


Darstellung in einer JSON

In [35]:
prediction_results = {
    "endpoint": decoded_prediction_endpoint[0],
    "method": decoded_prediction_method[0],
    "select": decoded_prediction_select[0],
    "required": decoded_prediction_required[0],
    "expand": decoded_prediction_expand[0],
    "expand_select": decoded_prediction_expand_select[0],
    "filter": decoded_prediction_filter[0],
    "presentation": decoded_prediction_presentation[0], # wichtig, für die Darstellung im Chatbot; blank = Text, table = Tabelle
}

json_results = json.dumps(prediction_results, indent=4)

# Ausgabe des JSON-Strings
print(json_results)

{
    "endpoint": "API_PRODUCTION_ORDER_2_SRV/A_ProductionOrderComponent",
    "method": "GET",
    "select": [
        "ManufacturingOrder"
    ],
    "required": [
        "ManufacturingOrder"
    ],
    "expand": [
        "to_ProductionOrderComponent"
    ],
    "expand_select": [
        "BaseUnit",
        "ManufacturingOrderOperation",
        "Material",
        "ProductionPlant",
        "RequiredQuantity"
    ],
    "filter": [],
    "presentation": "blank"
}
