In [2]:
import os
from dotenv import load_dotenv
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_openai import AzureChatOpenAI

# Step 1: Load environment variables from .env file
load_dotenv(".env")

# Step 2: Retrieve Azure OpenAI environment variables
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
OPENAI_API_VERSION = os.getenv("OPENAI_API_VERSION")

# Step 3: Initialize the Azure OpenAI model with required parameters
model = AzureChatOpenAI(
    azure_deployment="gpt-4o-mini",  # Your Azure deployment
    api_version=OPENAI_API_VERSION,  # Your API version
    api_key=AZURE_OPENAI_API_KEY,
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    temperature=0,  # Controls randomness; 0 is more deterministic
    max_tokens=None,  # Set according to your needs
    timeout=None  # Adjust if needed
)

In [3]:
import pandas as pd 
file_path = 'datasets/6-dimensions-for-website-2015-08-16.csv'
df_hofstede_dimension = pd.read_csv(file_path, sep=';')
df_hofstede_dimension.shape
# Filter rows where the data does not contain "#NULL!" (i.e., None values)
# Update the filter to consider '#NULL!' as a string instead of None
df_hofstede_dimension = df_hofstede_dimension.replace('#NULL!', None)
df_hofstede_dimension.dropna(subset=["pdi", "idv", "mas", "uai", "ltowvs", "ivr"], inplace=True)
df_hofstede_dimension.rename(columns={"ltowvs":"lto"}, inplace=True)
df_hofstede_dimension.shape

# Create a function to convert numeric values into categories: low, medium, high
def categorize(value):
    if int(value) <= 35:
        return 'low'
    elif 36 <= int(value) <= 75:
        return 'medium'
    else:
        return 'high'

# Apply the categorization function to each column
for column in ['pdi', 'idv', 'mas', 'uai', 'lto', 'ivr']:
    df_hofstede_dimension[f'{column}_value'] = df_hofstede_dimension[column].apply(categorize)

In [4]:
germany_dimension = df_hofstede_dimension[df_hofstede_dimension['country'] == 'Germany'].to_dict(orient='records')[0]
# germany_dimension

In [5]:
import pandas as pd
# Replace 'your_file.csv' with the path to your CSV file
file_path = 'datasets/NormBank.csv'
# Read the CSV file into a pandas DataFrame
df = pd.read_csv(file_path)
# Display the first few rows of the DataFrame
df.head()

Unnamed: 0,setting,behavior,setting-behavior,constraints,constraints_given,constraint_predict,norm,label,split
0,construction site,make sexual innuendos,construction site [BEHAVIOR] make sexual innue...,[OTHER]'s characteristic is 'uncomfortable' [A...,[OTHER]'s gender is 'woman' [AND] [OTHER]'s ch...,[PERSON]'s role is 'construction worker',taboo,0,train
1,cafe,talk about sex,cafe [BEHAVIOR] talk about sex,[PERSON]'s role is 'barista',[AND],[PERSON]'s role is 'barista',taboo,0,train
2,cemetery,pick flowers,cemetery [BEHAVIOR] pick flowers,[PERSON]'s role is 'cemetery salesperson',[AND],[PERSON]'s role is 'cemetery salesperson',taboo,0,test
3,banquet hall,have long hair,banquet hall [BEHAVIOR] have long hair,[PERSON]'s gender is 'woman',[AND],[PERSON]'s gender is 'woman',expected,2,train
4,arena,drink beer,arena [BEHAVIOR] drink beer,[PERSON]'s role is 'player',[AND],[PERSON]'s role is 'player',taboo,0,test


In [5]:
df.norm.value_counts()

taboo       68057
normal      59507
expected    27859
Name: norm, dtype: int64

In [6]:
df_expected = df[df.norm=="expected"]

In [7]:
sample = df_expected.iloc[2].to_dict()

In [8]:
sample

{'setting': 'bedroom',
 'behavior': 'sleep on your side',
 'setting-behavior': 'bedroom [BEHAVIOR] sleep on your side',
 'constraints': "[PERSON]'s medical condition is not 'gastro-oesophageal reflux disease (gord)' [AND] [PERSON]'s medical condition is 'obstructive sleep apnea' [AND] [PERSON]'s medical condition is not 'post-op caesarean-section' [AND] [PERSON]'s medical condition is not 'pregnant'",
 'constraints_given': "[PERSON]'s medical condition is 'obstructive sleep apnea' [AND] [PERSON]'s medical condition is not 'gastro-oesophageal reflux disease (gord)' [AND] [PERSON]'s medical condition is not 'pregnant' [AND] ",
 'constraint_predict': "[PERSON]'s medical condition is not 'post-op caesarean-section'",
 'norm': 'expected',
 'label': 2,
 'split': 'train'}

In [151]:
# Step 5: Create a NormBank prompt template for generating dialogue
normbank_prompt = PromptTemplate(
    template="""
    Generate a dialogue based on the following instructions:

    Situation: {situation}
    Constraints: {constraints}
    Country: {country}

    Please ensure the dialogue reflects the context described above.
    Speaker's name should reflect the country provided.
    """,
    input_variables=["situation", "constraints", "country"]
)

# Step 6: Define output parser (just as you have done in the reference)
output_parser = StrOutputParser()

# Step 7: Chain the components together (prompt + model + output parser)
chain = normbank_prompt | model | output_parser

# Step 8: Define input values and invoke the chain to generate the dialogue
input_values = {
    "situation" :  sample["setting"],
    "constraints": sample["constraints"],
    "country": "Germany"
}

# Step 9: Generate the result
dialogues = chain.invoke(input_values)

In [152]:
print(dialogues)

**Setting: A cozy bedroom in Germany, dimly lit with soft music playing in the background.**

**Characters:**
- **Anna**: A woman in her early thirties, recently diagnosed with obstructive sleep apnea.
- **Lukas**: Anna's supportive partner, concerned about her health.

---

**Anna**: (sitting on the edge of the bed, looking a bit tired) Ich habe wieder nicht gut geschlafen, Lukas. Es ist wirklich frustrierend.

**Lukas**: (sitting beside her, gently rubbing her back) Ich weiß, Anna. Hast du die Maske heute Nacht wieder getragen?

**Anna**: Ja, aber ich habe das Gefühl, dass sie nicht richtig sitzt. Manchmal fühle ich mich, als würde ich ersticken.

**Lukas**: Das klingt beängstigend. Vielleicht sollten wir mit deinem Arzt sprechen, um sicherzustellen, dass die Einstellung der Maschine stimmt.

**Anna**: (sighs) Ja, das denke ich auch. Ich möchte einfach wieder gut schlafen können. Es ist so anstrengend, den ganzen Tag müde zu sein.

**Lukas**: (nodding) Ich kann mir vorstellen, wie sc

## Norm-Verification

In [12]:
context = {
    'setting': 'bedroom',
    'behavior': 'sleep on your side',
    'setting-behavior': 'bedroom [BEHAVIOR] sleep on your side',
    'constraints': "[PERSON]'s medical condition is not 'gastro-oesophageal reflux disease (gord)' [AND] [PERSON]'s medical condition is 'obstructive sleep apnea' [AND] [PERSON]'s medical condition is not 'post-op caesarean-section' [AND] [PERSON]'s medical condition is not 'pregnant'",
    'country' : 'germany'
}

context = """
**Setting: A cozy bedroom in Germany, dimly lit with soft music playing in the background.**

**Characters:**
- **Anna**: A woman in her early thirties, recently diagnosed with obstructive sleep apnea.
- **Lukas**: Anna's supportive partner, concerned about her health.

---

**Anna**: (sitting on the edge of the bed, looking a bit tired) Ich habe wieder nicht gut geschlafen, Lukas. Es ist wirklich frustrierend.

**Lukas**: (sitting beside her, gently rubbing her back) Ich weiß, Anna. Hast du die Maske heute Nacht wieder getragen?

**Anna**: Ja, aber ich habe das Gefühl, dass sie nicht richtig sitzt. Manchmal fühle ich mich, als würde ich ersticken.

**Lukas**: Das klingt beängstigend. Vielleicht sollten wir mit deinem Arzt sprechen, um sicherzustellen, dass die Einstellung der Maschine stimmt.

**Anna**: (sighs) Ja, das denke ich auch. Ich möchte einfach wieder gut schlafen können. Es ist so anstrengend, den ganzen Tag müde zu sein.

**Lukas**: (nodding) Ich kann mir vorstellen, wie schwierig das ist. Hast du schon darüber nachgedacht, ob es etwas gibt, das wir zu Hause ändern können? Vielleicht die Schlafposition oder das Kissen?

**Anna**: (thoughtfully) Ich habe gehört, dass das Schlafen auf der Seite helfen kann. Aber ich bin so daran gewöhnt, auf dem Rücken zu schlafen.

**Lukas**: (encouragingly) Lass es uns versuchen! Wir könnten ein Kissen kaufen, das dir hilft, auf der Seite zu bleiben. Und ich kann dir helfen, dich daran zu gewöhnen.

**Anna**: (smiling slightly) Das wäre toll. Ich schätze deine Unterstützung so sehr. Es macht es ein bisschen einfacher, wenn ich nicht alleine bin.

**Lukas**: (smiling back) Natürlich, wir sind ein Team. Und ich werde alles tun, um sicherzustellen, dass du die Ruhe bekommst, die du brauchst.

**Anna**: (taking a deep breath) Danke, Lukas. Ich fühle mich schon besser, nur weil ich mit dir darüber gesprochen habe.

**Lukas**: (leaning closer) Das ist es, was Partner tun. Lass uns morgen einen Termin beim Arzt machen und dann sehen, was wir tun können, um deine Nächte besser zu machen.

**Anna**: (nodding) Ja, das klingt nach einem Plan. Ich bin froh, dass ich dich an meiner Seite habe.

**Lukas**: (kissing her forehead) Immer, Anna. Wir schaffen das zusammen.

--- 

**(The scene fades as they continue to talk softly, planning for a better night's sleep.)**
"""
norm_template = """
Given an input Text, respond with Social Norms that are assumed by the speaker of the text, in English. Social Norms
are rules and standards that are understood by members of a group, and that guide or constrain social behaviors
without the force of law. The Social Norms must be complete, correct, different, not contradicting with each other,
inferable from text.

Text: {context}

Schema:
Norms typically follow one of the following templates, and elaborate on the applicable conditions or why:
It's [good/helpful/important/expected] to do action [Y], under situation [Z]
It's [wrong/evil/not normal/unlawful] to do action [Y], for reason [Z]
In context [C], if one wants to do task or goal [X], one should / not force [P] to do behavior [Y]
List some social norms related to the situation, , in detail over separate lines:
"""

norm_extract_prompt = PromptTemplate(
    template=norm_template,
    input_variables=["context"]
)

output_parser = StrOutputParser()
norm_chain = norm_extract_prompt | model | output_parser
norm_result = norm_chain.invoke({"context" : context})

In [13]:
print(norm_result)

1. It's important to communicate openly about health issues, especially in a supportive relationship, to foster understanding and collaboration in finding solutions.

2. It's expected that partners will provide emotional support and encouragement during challenging times, such as dealing with health concerns like sleep apnea.

3. It's helpful to seek professional advice when facing health-related challenges, as consulting a doctor can provide guidance and reassurance.

4. It's considered supportive to actively participate in finding solutions to a partner's health issues, such as suggesting changes in sleep habits or purchasing helpful items like special pillows.

5. In the context of a romantic relationship, expressing gratitude for support and assistance is important for maintaining a positive emotional connection.

6. It's normal to feel frustrated or anxious about health problems, and sharing these feelings with a partner is a healthy way to cope.

7. It's expected that partners wi

## Hofstede-Verification

In [153]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field

# Define the data structure for Personal Medical Information.
class HofstedeDimension(BaseModel):
    pdi : str = Field(..., description="Power Distance Index (PDI): Examine the balance of authority and roles (e.g., leadership vs. community).", 
                      enum=["low", "medium", "high"])
    idv : str = Field(..., description="Individualism vs. Collectivism (IDV): Determine whether the dialogue emphasizes individual goals or collective well-being.", 
                      enum=["low", "medium", "high"])
    mas : str = Field(..., description="Masculinity vs. Femininity (MAS): Evaluate whether the focus is on competition and achievement or care, cooperation, and harmony.", 
                      enum=["low", "medium", "high"])
    uai : str = Field(..., description="Uncertainty Avoidance Index (UAI): Assess whether the dialogue reflects comfort with change and adaptability or a preference for tradition and stability.", 
                      enum=["low", "medium", "high"])
    lto : str = Field(..., description="Long-Term vs. Short-Term Orientation (LTO): Consider how traditions are preserved or balanced with short-term flexibility.", 
                      enum=["low", "medium", "high"])
    ivr : str = Field(..., description="Indulgence vs. Restraint (IVR): Identify expressions of celebration, emotional freedom, or restraint in behavior.", 
                      enum=["low", "medium", "high"])

verify_hofstede_template = """
Dialogues:
{dialogues}

Analyze the provided dialogues using Hofstede’s Cultural Dimensions Theory and predict how the score reflects the six dimensions. 

Return a JSON object.
{format_instructions}
"""

# Step 6: Define output parser (just as you have done in the reference)
parser = JsonOutputParser(pydantic_object=HofstedeDimension)

verify_hofstede_prompt = PromptTemplate(
    template=verify_hofstede_template,
    input_variables=["dialogues"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)


verify_hofstede_chain = verify_hofstede_prompt | model | parser
# Step 9: Generate the result
verify_hofstede_answer = verify_hofstede_chain.invoke({"dialogues": dialogues})

In [154]:
verify_hofstede_answer

{'pdi': 'low',
 'idv': 'medium',
 'mas': 'low',
 'uai': 'medium',
 'lto': 'medium',
 'ivr': 'medium'}

In [155]:
# Function to compare the values
def compare_dimensions(germany_dimension, verify_hofstede_answer):
    match_count = 0
    for key in verify_hofstede_answer:
        if germany_dimension.get(f"{key}_value") == verify_hofstede_answer[key]:
            match_count += 1
    return match_count

# Comparing the dimensions and getting the result
match_count = compare_dimensions(germany_dimension, verify_hofstede_answer)
match_count

4

In [157]:
import json

# Function to save dialogues to a JSONL file when match_count >= 4
def save_dialogues_to_jsonl(country, verify_hofstede_answer, dialogues, match_count, file_path='dialogues.jsonl'):
    if match_count >= 4:
        data = {
            'country': country,
            'hofstede_dimension': verify_hofstede_answer,
            'dialogues': dialogues
        }
        with open(file_path, 'a') as file:
            json.dump(data, file)
            file.write('\n')

# Example of how to call the function with a country, dialogues, and match_count
dialogues = [
    "What are the Hofstede dimensions for Germany?",
    "How do cultural values in Germany differ from other countries?"
]

# Calling the function with the Germany example data
save_dialogues_to_jsonl('Germany', verify_hofstede_answer, dialogues, match_count)

# This function appends the data to 'dialogues.jsonl' if match_count >= 4.
# The data will be added in the specified format as a new line in the JSONL file.