## Generate cretria for each sentement and COT examples

In [8]:
# Ensure your OpenAI API key is set
import pandas as pd
import json
import os

from langchain import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI


os.environ["OPENAI_API_KEY"] = "your_key_here"
openai_api_key = os.getenv("OPENAI_API_KEY")

In [9]:

model_name="gpt-4o"
#gpt-3.5-turbo

temperature=0.7
max_tokens=500

In [10]:
# Read the CSV file into a DataFrame
df = pd.read_csv("ctg_1example_pol.csv")
df

Unnamed: 0,Category,Negative,Neutral,Positive
0,morenews,Adopt a Pet: Marcelo from Houndhaven. Linda Co...,Advocacy groups push Senate to include electio...,Separated pit bulls 'hit doggie lottery' as Ma...
1,scienceandtechnology,Sea level will rise for centuries. We can cont...,Scientists Have Calculated a New Estimate for ...,8BitDo turns the NES gamepad into a mouse. Man...
2,foodanddrink,Dairy farmers are in crisis and it could chang...,How To Open a Can Without a Can Opener. All it...,"Annual pancake breakfast, parade honor those w..."
3,autos,"U-Haul truck, truck carrying oversize load cra...",X019 won't have any Xbox Project Scarlett news...,2020 Audi R8 Performance Review: What Makes Th...
4,us,Chicago Teachers Union To March Through City T...,"The cheapest apartments for rent in Alhambra, ...",Middletown's irrepressible cool: On what accou...
5,music,Former KNCI DJ Dan Cheatham Found Dead In His ...,The 25 best soundtracks of the '90s. It was th...,Women rule the night at the CMA Awards. Women ...
6,politics,Robert Blair: Who is the White House aide refu...,Deval Patrick in race to make Democrat debate ...,Three women nominated to fill vacant Democrati...
7,tv,'RHOC' Star Meghan Edmonds' Ex-Nanny Making Mo...,Stars who've been on one show for a long time....,Ohio's pink tax repeal signed into law. Ohio's...
8,finance,Amazon's new weapon to crush competition: $1 i...,Historic Plant City Coronet Bungalow Up For Sa...,Stocks close higher after Fed signals pause on...
9,video,Coalition of conservative groups files ethics ...,The celestial spectacle humans will never see....,World Series champion Nationals parade through...


In [17]:
# Function to generate criteria string for a given category
def generate_criteria(category):
    
    prompt_template = """
   Generate detailed criteria for determining the sentiment of {category} news articles. 
   Provide comprehensive and specific criteria for identifying positive, negative, and neutral sentiments within the context of {category} news category. 
   Include content types that typically correspond to each sentiment classification.
   
   Your response only has the description of each sentiment.
   Please provide the response without using any formatting symbols like asterisks or hashtags.    

   
    
    Positive:
    

    Negative:
    

    Neutral:
    
    """

        # Create the prompt template
    prompt = PromptTemplate(input_variables=["category"], template=prompt_template)

    # Initialize the ChatOpenAI model
    llm = ChatOpenAI(model_name=model_name, openai_api_key=openai_api_key, temperature=temperature, max_tokens=max_tokens)
    # Create the LLMChain with the prompt and the LLM
    chain = LLMChain(prompt=prompt, llm=llm)
    result = chain.run(category=category)
    return result.strip()



In [18]:

def generate_CoTExamples(category, criteria_string, negative, neutral, positive):
    
    prompt_template ="""
    You will be provided with three news articles about {category}. 
    Your role is to determine the sentiment of the articles. 
    Use the following this step-by-step approach:
    
    1. Identify key points in the article.
    2. Determine sentiment indicators in the article based on the criteria: {criteria_string}
    3. Conclude the overall sentiment.
    
    Your response should have the following structure. 
    Please provide the response without using any formatting symbols like asterisks or hashtags.    
    
    Article 1: {negative}
    1. Key points: 
    2. Sentiment indicators:
    3. Sentiment:

    Article 2: {neutral}
    1. Key points: 
    2. Sentiment indicators:
    3. Sentiment:
    
    
    Article 3: {positive}
    1. Key points: 
    2. Sentiment indicators:
    3. Sentiment:
    
    
    """
    # Create the prompt template
    prompt = PromptTemplate(input_variables=["category", "criteria_string", "negative", "neutral", "positive"], template=prompt_template)
    # Initialize the ChatOpenAI model
    llm = ChatOpenAI(model_name=model_name, openai_api_key=openai_api_key, temperature=0.2, max_tokens=max_tokens)
    # Create the LLMChain with the prompt and the LLM
    chain = LLMChain(prompt=prompt, llm=llm)
    result = chain.run(category=category, criteria_string=criteria_string, negative=negative, neutral=neutral, positive=positive)
    return result.strip()



In [19]:
def parse_criteria(combined_string):
    criteria = {"positive": [], "negative": [], "neutral": []}
    examples = []
    current_section = None

    for line in combined_string.split('\n'):
        line = line.strip()
        if line.startswith("Positive:"):
            current_section = "positive"
        elif line.startswith("Negative:"):
            current_section = "negative"
        elif line.startswith("Neutral:"):
            current_section = "neutral"
        elif line.startswith("Article"):
            current_section = "articles"
            examples.append(line)  # Start a new article
        elif current_section and line:
            if current_section == "articles":
                examples.append(line)
            else:
                criteria[current_section].append(line)

    return criteria, "\n".join(examples)




# Generate criteria for each category and format them into a configuration


categories = df['Category'].tolist()
negative = df['Negative'].tolist()
neutral = df['Neutral'].tolist()
positive = df['Positive'].tolist()

for i in range(len(categories)):
    config = {}
    criteria_string = generate_criteria(categories[i])
    examples_string= generate_CoTExamples(categories[i], criteria_string, negative[i], neutral[i], positive[i])
    combined_string = criteria_string + "\n" + examples_string

    criteria, examples = parse_criteria(combined_string)
    config = {
        categories[i]: {
            "positive": " ".join(criteria["positive"]),
            "negative": " ".join(criteria["negative"]),
            "neutral": " ".join(criteria["neutral"]),
            "chain_of_thought_examples": examples.strip()
        }
    }

    # Save the configuration to a JSON file
    path= "snt_criteria_examples/"
    with open(path+categories[i]+'.json', 'w') as file:
        json.dump(config, file, indent=4)

    print("Configuration file created with generated criteria and chain of thought examples.")



Configuration file created with generated criteria and chain of thought examples.
Configuration file created with generated criteria and chain of thought examples.
Configuration file created with generated criteria and chain of thought examples.
Configuration file created with generated criteria and chain of thought examples.
Configuration file created with generated criteria and chain of thought examples.
Configuration file created with generated criteria and chain of thought examples.
Configuration file created with generated criteria and chain of thought examples.
Configuration file created with generated criteria and chain of thought examples.
Configuration file created with generated criteria and chain of thought examples.
Configuration file created with generated criteria and chain of thought examples.
Configuration file created with generated criteria and chain of thought examples.
Configuration file created with generated criteria and chain of thought examples.
Configuration fi

## Convert JSON file to txt 

In [22]:
import json
import os
import glob

def convert_json_to_text(json_data):
    output = []

    for category, sentiments in json_data.items():
        output.append(f"{category.capitalize()}:\n")
        for sentiment, text in sentiments.items():
            text = text.replace('\\u2019', "'")
            output.append(f"{sentiment.capitalize()}:\n{text}\n")

    return "\n".join(output)



txt_path="snt_criteria_examples/txtFiles/"
json_path= "snt_criteria_examples/"


# Use glob to get all the JSON files in the directory
json_files = glob.glob(os.path.join(json_path, '*.json'))

# Loop through the JSON files
for json_file in json_files:
    with open(json_file, 'r') as file1:
        json_data = json.load(file1)

    text_output = convert_json_to_text(json_data)
    
    # Correctly form the file name by joining the directory and the file name
    file_name = os.path.join(txt_path, list(json_data.keys())[0] + ".txt")
    with open(file_name, 'w') as file2:
        file2.write(text_output)

    print(f"Conversion complete! Check the '{file_name}' file.")


Conversion complete! Check the '/home/alshma0a/PycharmProjects/Agent4Rec_v2/my notebook/snt_criteria_examples/txtFiles/video.txt' file.
Conversion complete! Check the '/home/alshma0a/PycharmProjects/Agent4Rec_v2/my notebook/snt_criteria_examples/txtFiles/entertainment.txt' file.
Conversion complete! Check the '/home/alshma0a/PycharmProjects/Agent4Rec_v2/my notebook/snt_criteria_examples/txtFiles/travel.txt' file.
Conversion complete! Check the '/home/alshma0a/PycharmProjects/Agent4Rec_v2/my notebook/snt_criteria_examples/txtFiles/finance.txt' file.
Conversion complete! Check the '/home/alshma0a/PycharmProjects/Agent4Rec_v2/my notebook/snt_criteria_examples/txtFiles/politics.txt' file.
Conversion complete! Check the '/home/alshma0a/PycharmProjects/Agent4Rec_v2/my notebook/snt_criteria_examples/txtFiles/health.txt' file.
Conversion complete! Check the '/home/alshma0a/PycharmProjects/Agent4Rec_v2/my notebook/snt_criteria_examples/txtFiles/baseball.txt' file.
Conversion complete! Check the