# Intro til språkmodeller

### Spørsmål

- Hvordan gjør vi det med nøkler? Per nå leser denne fortsatt fra .env-filen da jeg ikke ville pushe de til github.


### Tanker

- Gi mer info i prompt om situasjonen rundt spørreundersøkelsen og hva det har blitt spurt om. Spesielt i situasjonen hvor vi øsnker å oppsummere den generelle viben av feedbacken i hver kategori. Var folk fornøyde? Ønsker de tiltak for forbedring?

# AzureChatOpenAI
### Språkmodellen

In [None]:
from langchain_openai import AzureChatOpenAI
from dotenv import find_dotenv, load_dotenv
import os


# Get environment variables
load_dotenv(find_dotenv(), override=True)


class Llm:
    """
    Class containing the language model.
    """

    llm = AzureChatOpenAI(
        azure_deployment=os.environ["AZURE_OPENAI_DEPLOYMENT_GPT_4O_MINI"],
        model=os.environ.get("OPENAI_MODEL_GPT_4O_MINI", default="gpt-4o-mini"),
        openai_api_key=os.environ["OPENAI_API_KEY_4O"],
        openai_api_version=os.environ["OPENAI_API_VERSION_4O"],
        azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT_4O"],
        temperature=0,
    )


class LlmRes:
    """
    Class containing the language model.
    """

    llm = AzureChatOpenAI(
        azure_deployment=os.environ["AZURE_OPENAI_DEPLOYMENT_GPT_3O_MINI"],
        model=os.environ.get("OPENAI_MODEL_GPT_3O_MINI", default="o3-mini"),
        openai_api_key=os.environ["OPENAI_API_KEY_3O"],
        openai_api_version=os.environ["OPENAI_API_VERSION_3O"],
        azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT_3O"],
        reasoning_effort="high",
        # reasoning_effort: str,
        # Constrains effort on reasoning for reasoning models.
        # Reasoning models only, like OpenAI o1 and o3-mini.
        # Currently supported values are low, medium, and high.
        # Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.
    )


# LLM = Llm()

# print(Llm.llm.invoke("whats up?").content)


# Datasettet

In [None]:
import pandas as pd


class FeedbackData:
    """
    Class containing fake datasets by us :)
    """

    # Load a DataFrame with a specific version of a CSV
    file_path = "../files/shuffled_df_LLM101_filtered.csv"
    df_o = pd.read_csv(file_path)

    enr_path = "../files/random_out.csv"
    df = pd.read_csv(enr_path)#.head(20)


# print(FeedbackData.df["Feedback"][0])


# Structured output
### Pydantic-objekter

In [None]:
from pydantic import BaseModel, Field
from typing import Literal, Optional


class Categorize_search(BaseModel):
    """Categorization of IT-questionaire feedback."""

    categories: Literal[
        "Network",
        "IT Training",
        "IT Support",
        'Other'
    ] = Field(
        description="Categorize the feedback into the most fitting category. If the categories provided are not a perfect fit, default to 'Other'."
    )
    #if_other: str = Field(
        #description="If you chose to categorize the feedback as 'Other', return an explanation as to why and what category you think would be the best fit for the feedback."
    #)


class Categorize_bound(BaseModel):
    # Begrenser kategoriene modellen kan velge mellom. Den får kun lov til å putte feedbacken i en av de forhåndsbestemte kategoriene.
    """Categorization of IT-questionaire feedback."""

    categories: Literal[
        "Cyber security",
        "IT training",
        "IT support",
        "Quality of technology",
        "Data quality",
        "Network",
    ] = Field(description="Categorize the feedback into the most fitting category.")
    rating: Optional[int] = Field(
        description="Your certainty of the corectness of the best category on a scale from 1-10"
    )
    reason: str = Field(
        description="Give a short scentence as to why you think this category is the best fit."
    )



class Categorize(BaseModel):
    "Categorization of feedback on IT-services."

    # Thoughts: Forklar tankegangen din.
    cat_1: str = Field(description="The best fitting general category. Only one.")
    rating: Optional[int] = Field(
        description="Your certainty of the corectness of the best category on a scale from 1-10"
    )




# Kategorisering

In [None]:
def categorize_feedback(feedback_txt : str, struktur, llm_model) -> dict:
    # Prompt
    task = f"""
    The following feedback is from an internal survey at 'IT and Things Company' where they asked their employees for feedback on their IT-services in general.
    Catgorize the feedback: {feedback_txt}.
    """

    # Defining structured output for the model
    structured_llm = llm_model.with_structured_output(
        struktur, method="function_calling"
    )

    # Prompt model
    response = structured_llm.invoke(task)
    # Return the response
    return response.__dict__

# Din tur

In [None]:
## returnerer kategorisert feedback

responses = []

for f in FeedbackData.df["Feedback"]:
    feedback_4O = categorize_feedback(feedback_txt = f, struktur = Categorize_search, llm_model = Llm.llm)
    feedback_3O = categorize_feedback(feedback_txt = f, struktur = Categorize_search, llm_model = LlmRes.llm)
    responses.append({'Feedback': f, 'Category_4O': feedback_4O['categories'], 'Category_3O': feedback_3O['categories']})
    print(responses)

    
cat_df = pd.DataFrame(responses)
print(cat_df)

In [None]:
others_df = cat_df[(cat_df['Category'] == 'Other') | (cat_df['Category2'] == 'Other')]
print(others_df)

In [None]:
## returnerer kategorisert feedback

responses_new_cat = []

for f in others_df["Feedback"]:
    output_i = categorize_feedback(feedback_txt = f, struktur = Categorize, llm_model = Llm.llm)
    responses_new_cat.append({'Feedback': f,'Category': output_i["cat_1"]})

new_cat_df = pd.DataFrame(responses_new_cat)
print(new_cat_df)