In [None]:
pip install --upgrade openai

In [37]:
import pandas as pd

# read xlsx file
df = pd.read_excel("/Users/johri/Projects/AIEarthHack/AI EarthHack Dataset.xlsx")
print(df.columns)
print("Before pre-processing: ", len(df))

# filter entries with less than 10 words in the solution column
df = df[df['solution'].str.split().str.len().gt(20)]

# filter entries with similar solution 
df = df.drop_duplicates(subset=['solution'])
print("After pre-processing: ", len(df))

Index(['id', 'problem', 'solution'], dtype='object')
Before pre-processing:  1300
After pre-processing:  1208


In [76]:
import json
from openai import OpenAI

client = OpenAI(
    api_key="API_KEY",
)

def chat_gpt(prompt):
    response = client.chat.completions.create(
        model="gpt-3.5-turbo-1106",
        response_format={"type":"json_object"},
        messages=[
            {"role": "system", "content": "Provide output in valid JSON"},
            {"role": "user", "content": prompt}]
    )

    ret_json = response.choices[0].message.content.strip()
    
    return ret_json


## if using different version of openai
# import openai
# def chat_gpt(prompt):
#     response = openai.ChatCompletion.create(
#         model="gpt-3.5-turbo",
#         messages=[{"role": "user", "content": prompt}]
#     )
#     ret_json = json.dumps(response.choices[0].message.content.strip())
    
#     return ret_json


In [86]:
from ast import literal_eval

min_word_count = 10
min_bullet = 2
max_bullet = 5
key_features = "objective, relevance_to_circular_economy, feasibility, impact, technical_complexity, innovation, sustainability, market_potential, competitive_landscape, cost-effectiveness"
dict_keys = "'rating_objective', 'rating_relevance_to_circular_economy', 'rating_feasibility', 'rating_impact', 'rating_technical_complexity', 'rating_innovation', 'rating_sustainability', 'rating_market_potential', 'rating_competitive_landscape', 'rating_cost-effectiveness', 'explanation_objective', 'explanation_relevance_to_circular_economy', 'explanation_feasibility', 'explanation_impact', 'explanation_technical_complexity', 'explanation_innovation', 'explanation_sustainability', 'explanation_market_potential', 'explanation_competitive_landscape', 'explanation_cost-effectiveness'"
df_final = pd.DataFrame()
n = 10

for inx in range(0, 50, n):
    proposals = ""
    selected_df = df[inx:inx+n]
    for inx, row in selected_df.iterrows():
        problem_statement = row['problem'].strip()
        solution_statement = row['solution'].strip()
        proposals += f"id: {row['id']}\nproblem: {problem_statement}\nsolution: {solution_statement}\n"

    prompt = f"""Given are {n} proposals for a competition titled "Unlocking the Potential of Circular Economy".
                The solutions were sourced all over the world in various ways with circular economy applications to a wide array of industries, ranging from textiles to food waste management.
                Participants were asked about the problem their solution is meant to solve and describe the solution in their own words.
                These solutions must be presented before a human judge. To help the judge, extract the main idea from each proposal.
                Secondly, for each proposal provide comments and ratings using the following rubrics: {key_features}
                The comments for any proposal must not exceed {min_word_count} words. Limit to {min_bullet}-{max_bullet} bullet points. Rate each rubric from 0 to 100, with 0 being the worst and 100 being the best.
                Thirdly, generate a summary of the description of problem and solution for each propsal. Limit the summary to around 100 words.
                The output should be a single json as mentioned below: 
                {{
                    id1: {{
                        explanation_rubric1: ...,
                        rating_rubric1: ...,
                        explanation_rubric2: ...,
                        rating_rubric2: ...,
                        ...
                        summary: ...
                    }},
                    id2: {{
                        explanation_rubric1: ...,
                        rating_rubric1: ...,
                        explanation_rubric2: ...,
                        rating_rubric2: ...,
                        ...
                        summary: ... 
                        }},
                    ...
                }}

                Here, id1, id2, ... will be the respective proposal id.
                The rubric1, rubric2, ... will be: {dict_keys}.
                Ensure that the summary describes  the problem and solution clearly. 
                
                Use the following pairs of problem and solution enclosed in triple backticks:
                ```
                {proposals}
                ```
                """

    # print(prompt)
    response_json = chat_gpt(prompt)

    print(response_json)
    # json_object = literal_eval(json.loads(response_json))
    json_object = json.loads(response_json)

    json_df = pd.DataFrame(json_object).T
    df_final = pd.concat([df_final, json_df], ignore_index=True)

df_final

{
  "1": {
    "rating_objective": 90,
    "explanation_objective": "Addresses significant waste in traditional construction methods.",
    "rating_relevance_to_circular_economy": 95,
    "explanation_relevance_to_circular_economy": "Embraces recycling and reuse, advancing circular economy principles.",
    "rating_feasibility": 85,
    "explanation_feasibility": "Proven feasibility in global markets, indicating potential scalability.",
    "rating_impact": 92,
    "explanation_impact": "Reduces construction waste by 90% and time by 30-50%, optimizing environmental and financial efficiency.",
    "rating_technical_complexity": 80,
    "explanation_technical_complexity": "Involves engineered components and manufacturing facility integration.",
    "rating_innovation": 88,
    "explanation_innovation": "Transitioning from 'take, make, and dispose' to 'reduce, reuse, and recycle' model is innovative.",
    "rating_sustainability": 90,
    "explanation_sustainability": "Drives industry tow

In [None]:
result_df = pd.concat([df[['id']], df_final], axis=1) 
result_df.to_csv("result.csv")