In [12]:
# !pip install openai -q

In [18]:
import openai
import numpy as np
import pandas as pd

from os import getenv, environ

if "OPENAI_API_KEY" not in environ:
    raise EnvironmentError("Please include a valid OpenAI API Key as the environment variable 'OPENAI_API_KEY'.")

openai.api_key = getenv("OPENAI_API_KEY")

In [14]:
def get_completion(prompt, engine = 'text-davinci-003'):
    response = openai.Completion.create(
        engine = engine,
        prompt = prompt,
        max_tokens = 2500,
        n = 1  
    )
    return response.choices[0].text

In [23]:
input_text = """

Problem : The construction industry is indubitably one of the significant contributors to global waste, contributing approximately 1.3 billion tons of waste annually, exerting significant pressure on our landfills and natural resources. Traditional construction methods entail single-use designs that require frequent demolitions, leading to resource depletion and wastage.   
Solution : Herein, we propose an innovative approach to mitigate this problem: Modular Construction. This method embraces recycling and reuse, taking a significant stride towards a circular economy.   Modular construction involves utilizing engineered components in a manufacturing facility that are later assembled on-site. These components are designed for easy disassembling, enabling them to be reused in diverse projects, thus significantly reducing waste and conserving resources.  Not only does this method decrease construction waste by up to 90%, but it also decreases construction time by 30-50%, optimizing both environmental and financial efficiency. This reduction in time corresponds to substantial financial savings for businesses. Moreover, the modular approach allows greater flexibility, adapting to changing needs over time.  We believe, by adopting modular construction, the industry can transit from a 'take, make and dispose' model to a more sustainable 'reduce, reuse, and recycle' model, driving the industry towards a more circular and sustainable future. The feasibility of this concept is already being proven in markets around the globe, indicating its potential for scalability and real-world application.

"""
     

In [24]:
def generate_ans(text):
    prompt=f"""
        I have provided you with one problem statement and one potential solution. 
        Please answer the following 8 questions and return an answer to each on a new line.

            0. If the Solution is relevant to the problem described ? answer yes or no. 

            if the answer is yes, proceed to answwering the following qqustions.  

            1. Which Industry does the solution apply to? choose from Manufacturing , Apparel, Construction,  Other.  
            2. Which 10R principle or principles from Cramer 2017 does the solution utilitize? Answer as a single word or "Not Known"
            3. Which environmental area does the solution focus on? answer in one or two words
            4. Does the solution quantify its environmental impact? answer as yes or no or "Not Known"
            5. Does the solution require heavy initital or operating investment? answer as yes or no or "Not Known"
            6. Does the solution provide monetary benefits including but not limited to additonal reveneue generation or reduced costs? answer as yes or no or "Not Known"
            7. Is the solution scalable? answer as yes or no or "Not Known"
            8. What is the aproximate payback period of the investment? answer as a single number or "Not Known"

        ```{text}```
        """
    ans = get_completion(prompt)

    return ans

In [25]:
ans = generate_ans(input_text)

In [26]:
print(ans)

 0. Yes 
         1. Construction 
         2. Reuse 
         3. Waste 
         4. Yes 
         5. Not Known 
         6. Yes 
         7. Yes 
         8. Not Known


In [3]:
ans = """
0. Yes 
        1. Construction 
        2. Reuse 
        3. Waste 
        4. Yes 
        5. Not Known 
        6. Yes 
        7. Yes 
        8. Not Known
"""
# to format
lines = ans.splitlines()
ans_list = [line.strip()[3:] for line in lines if line.strip() != '']
ans

In [31]:
df = pd.read_csv("../csv/random_response_data_frame.csv")
df["relevance"] = np.random.rand(len(df)) < 0.7  # temporary
df

Unnamed: 0,industry,ten_R,area_focus,applicable,heavy_investment,monetary_benefits,scalable,payback_period,relevance
0,Other,Recover,Climate Change,No,Yes,Yes,Not Known,Not Known,True
1,Other,Recover,Plastic Pollution,No,No,Not Known,Not Known,Yes,True
2,Other,Rethink,Plastic Pollution,Yes,No,Yes,Yes,No,True
3,Other,Refurbish,Air Pollution,Yes,No,Not Known,Yes,Yes,False
4,Other,Remanufacture,Climate Change,No,Yes,Yes,Yes,No,True
...,...,...,...,...,...,...,...,...,...
995,Apparel,Refurbish,Ocean Acidification,Yes,No,No,Not Known,Yes,True
996,Manufacturing,Repair,Deforestation,No,No,Yes,Yes,Yes,False
997,Other,Refuse,Land Degradation,Yes,Not Known,No,No,Not Known,True
998,Manufacturing,Repair,Ocean Acidification,No,Not Known,Not Known,Not Known,No,True


In [57]:
df.value_counts("relevance")

relevance
True     718
False    282
Name: count, dtype: int64

In [63]:
def calculate_scores(llm_response: pd.Series, feature_weights: list) -> float:
    curr_score = 0
    if llm_response["relevance"] == True:
        # for now, exclude industry, ten_R, and area_focus
        for fw in feature_weights:
            curr_score += llm_response[fw] * feature_weights[fw]
    return curr_score

In [64]:
value_mapping = {"Yes": 1, "No": -1, "Not Known": 0}
df_numeric = df.replace(value_mapping)
feature_weights = {"applicable": 0.2, 
                   "heavy_investment": 0.2,
                   "monetary_benefits": 0.2,
                   "scalable": 0.2,
                   "payback_period": 0.2}
assert(sum(feature_weights.values()) == 1)

df_numeric["scores"] = df_numeric.apply(lambda row: calculate_scores(row, feature_weights), axis = 1)

In [66]:
df_numeric.head()

Unnamed: 0,industry,ten_R,area_focus,applicable,heavy_investment,monetary_benefits,scalable,payback_period,relevance,scores
0,Other,Recover,Climate Change,-1,1,1,0,0,True,0.2
1,Other,Recover,Plastic Pollution,-1,-1,0,0,1,True,-0.2
2,Other,Rethink,Plastic Pollution,1,-1,1,1,-1,True,0.2
3,Other,Refurbish,Air Pollution,1,-1,0,1,1,False,0.0
4,Other,Remanufacture,Climate Change,-1,1,1,1,-1,True,0.2


## Rank the pairs by each calculated score

In [71]:
x = 0.25 # filter to get only top x% of ranks

In [74]:
num_top_pairs = int(x * len(df_numeric))
df_numeric.sort_values(by="scores", ascending=False).head(num_top_pairs).head()

Unnamed: 0,industry,ten_R,area_focus,applicable,heavy_investment,monetary_benefits,scalable,payback_period,relevance,scores
928,Other,Refuse,Waste Management,1,1,1,1,1,True,1.0
34,Manufacturing,Refuse,Loss of Freshwater Resources,1,1,1,1,1,True,1.0
446,Manufacturing,Repurpose,Land Degradation,1,1,1,1,1,True,1.0
369,Other,Rethink,Plastic Pollution,1,1,1,1,1,True,1.0
886,Apparel,Rethink,Resource Depletion,1,1,1,1,1,True,1.0
