In [110]:
def clean_and_load_json(json_string):
    """
    Cleans a JSON string by removing unnecessary escape sequences
    and loads it into a Python dictionary.
    """
    if json_string.startswith('```json'):
        json_string = json_string.replace('```json\n', '').replace('```', '')
    
    cleaned_string = json_string.replace('\\n', '\n')
    cleaned_string = cleaned_string.replace('\\"', '"')
    
    cleaned_string = cleaned_string.strip()
    
    print("Cleaned String:", repr(cleaned_string))
    
    try:
        json_data = json.loads(cleaned_string)
        return json_data
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")
        return None




In [111]:
import pandas as pd

In [112]:
from remedier.prompt import prompt_template

## Loading Data

In [113]:
data = pd.read_csv("final_data.csv").iloc[:, 1:]
data.school_distance = data.school_distance * 1500
data.head()

Unnamed: 0,lot_id,length,area,longittude,latitude,geometry,BoroName,CDTANAME,heat_vulne,FSHRI,school_distance,income,air_quality
0,19500000025,808.578354,20904.920539,-73.943688,40.670929,POLYGON ((-73.94383271287597 40.67114717239303...,Brooklyn,BK08 Crown Heights (North) (CD 8 Approximation),5.0,4.0,4.670131,54211.0,6.6
1,19500000568,670.877526,11100.514525,-73.935874,40.694925,POLYGON ((-73.93594017532916 40.69472976887952...,Brooklyn,BK03 Bedford-Stuyvesant (CD 3 Approximation),4.0,3.0,2.789412,51507.0,6.7
2,19500000569,615.005206,15974.755269,-73.943133,40.671834,POLYGON ((-73.94318848936774 40.67205993731008...,Brooklyn,BK08 Crown Heights (North) (CD 8 Approximation),5.0,4.0,3.777578,54211.0,6.6
3,19500000882,348.336185,6259.906231,-73.938028,40.694939,"POLYGON ((-73.93801437144917 40.6951108401132,...",Brooklyn,BK03 Bedford-Stuyvesant (CD 3 Approximation),4.0,3.0,0.611709,51507.0,6.7
4,19500001059,420.281057,6735.829655,-73.943037,40.660892,POLYGON ((-73.94285430749058 40.66097458745293...,Brooklyn,BK09 Crown Heights (South) (CD 9 Approximation),5.0,2.0,3.542392,82716.0,6.3


In [114]:
top_500 = data.sort_values("area", ascending=False).reset_index()[:500]

## Setting up Langchain and OpenAI

In [115]:
from openai import OpenAI
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv, find_dotenv
import json
_ = load_dotenv(find_dotenv())


In [116]:
prompt = PromptTemplate.from_template(prompt_template)


In [117]:
top_500 = top_500.rename(columns={"longittude": "longitude"})


In [118]:
top_120

Unnamed: 0,index,lot_id,length,area,longitude,latitude,geometry,BoroName,CDTANAME,heat_vulne,FSHRI,school_distance,income,air_quality
0,4269,2500000026,8579.064912,1.400269e+06,-73.882343,40.595337,POLYGON ((-73.88059973022631 40.59264026121464...,Brooklyn,BK56 Jamaica Bay (West) (JIA 56 Approximation),,,49.729903,,
1,75,20500000119,7239.913626,6.616666e+05,-73.870831,40.651426,POLYGON ((-73.8675240922531 40.653422690445154...,Brooklyn,BK05 East New York-Cypress Hills (CD 5 Approxi...,3.0,3.0,10.013246,38670.0,6.3
2,2973,19500001055,8327.208282,6.490558e+05,-74.017224,40.667794,POLYGON ((-74.01787483412605 40.67111760977025...,Brooklyn,BK06 Park Slope-Carroll Gardens (CD 6 Approxim...,2.0,,15.853380,130396.0,6.9
3,4231,2500000006,3695.028623,4.919382e+05,-73.881739,40.580793,POLYGON ((-73.88072908167355 40.58172961446374...,Brooklyn,BK56 Jamaica Bay (West) (JIA 56 Approximation),2.0,,41.349493,91379.0,
4,4250,2500000342,11459.024553,4.353857e+05,-73.897425,40.588361,POLYGON ((-73.89819812090605 40.59037319881936...,Brooklyn,BK56 Jamaica Bay (West) (JIA 56 Approximation),2.0,,43.892493,91379.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,3598,19500000244,1106.904555,2.813842e+04,-73.948912,40.681585,POLYGON ((-73.94863839430582 40.68180547374878...,Brooklyn,BK03 Bedford-Stuyvesant (CD 3 Approximation),4.0,1.0,4.859506,92684.0,6.7
496,920,20500001579,774.704195,2.810518e+04,-73.879087,40.676704,POLYGON ((-73.87941592679898 40.67663427610019...,Brooklyn,BK05 East New York-Cypress Hills (CD 5 Approxi...,4.0,3.0,1.411116,56298.0,6.3
497,2182,18500000435,1518.871212,2.809181e+04,-73.955770,40.630595,POLYGON ((-73.95610665169164 40.63053528366709...,Brooklyn,BK14 Flatbush-Midwood (CD 14 Approximation),5.0,3.0,3.779507,82904.0,6.1
498,1913,18500000750,720.832188,2.807456e+04,-73.962088,40.580525,POLYGON ((-73.96193582210708 40.58077064841048...,Brooklyn,BK13 Coney Island-Brighton Beach (CD 13 Approx...,3.0,5.0,0.471947,61320.0,5.8


In [119]:
client = OpenAI()

In [120]:
output_file = 'gpt_responses.csv'

In [122]:
for _, row in top_500.iterrows():
    features = {
        'length': row.length,
        'area': row.area,
        'latitude': row.latitude,
        'longitude': row.longitude,
        'borough':  row.BoroName,
        'community_name': row.CDTANAME,
        'heat_vulnerability': row.heat_vulne,
        'flood_risk': row.FSHRI,
        'school_proximity': row.school_distance,
        'median_income': row.income,
        'air_quality': row.air_quality
    }
    
    formatted_prompt = prompt.format(**features)
    
    response = client.chat.completions.create(
        model="gpt-4o",  # or "gpt-3.5-turbo"
        messages=[
            {"role": "system", "content": "You are a community-focused problem solver specializing in sustainable urban planning for New York, with a deep understanding of local needs"},
            {"role": "user", "content": formatted_prompt}
        ])

    gpt_response = response.choices[0].message.content
    gpt_response_json = clean_and_load_json(gpt_response)

    # Create a DataFrame with the current response (single row)
    current_response_df = pd.DataFrame([{
        'lot_id': row.lot_id,
        'gpt_response': gpt_response_json
    }])

    # Append the current response to the CSV file
    current_response_df.to_csv(output_file, mode='a', header=write_header, index=False, encoding='utf-8')

    # After writing the first row, make sure header is False for subsequent rows
    write_header = False


Cleaned String: '{\n    "chosen_response": "Urban Wetlands",\n    "reasoning": "Given the large area and the importance of climate resilience in Brooklyn, an urban wetlands project offers both ecological and cultural benefits. Urban wetlands can help manage stormwater, reduce flooding risks, and improve air quality, which aligns well with the site\'s features. While the Heat Vulnerability and Flood Risk Index values are not provided, wetlands generally excel in mitigating such risks. Given the site\'s distance from schools and a potentially diverse income bracket, urban wetlands could serve as an educational and recreational space. Financially, urban wetlands are a cost-effective long-term investment in community health, resilience, and biodiversity.",\n    "suggested_price": 1500000,\n    "top_3_positive_impacts": [\n        {"First important positive impact": "Flood mitigation and stormwater management"},\n        {"Second important positive impact": "Improvement in air and water qua

In [123]:
top_500.to_csv("Top 500 Brooklyn")