<a href="https://colab.research.google.com/github/alexander-gavrilov/JSON-java/blob/master/ai_lab_01.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import json
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

# Function to load the Excel file
def load_excel_file(file_path):
    return pd.read_excel(file_path, engine='openpyxl')

# Initialize the OpenAI model with LangChain
llm = OpenAI(model_name="gpt-4o-mini", temperature=0.7)

# Define the prompt template for calculating property area
area_prompt_template = """
Given the following property description, calculate the total property area in square meters.

Property Description: {description}

Total Property Area (in square meters):
"""

area_prompt = PromptTemplate(template=area_prompt_template, input_variables=["description"])
area_chain = LLMChain(llm=llm, prompt=area_prompt)

# Define the prompt template for validating property area
validation_prompt_template = """
Given the following property description and calculated area, validate if the calculated area is correct.

Property Description: {description}
Calculated Area: {calculated_area} square meters

Is the calculated area correct? (yes/no):
"""

validation_prompt = PromptTemplate(template=validation_prompt_template, input_variables=["description", "calculated_area"])
validation_chain = LLMChain(llm=llm, prompt=validation_prompt)

# Function to calculate property area using OpenAI model
def calculate_property_area(description):
    response = area_chain.run(description)
    # Extract the area from the response (assuming the model returns a number)
    area_in_square_meters = float(response.strip())
    return area_in_square_meters

# Function to validate property area using OpenAI model
def validate_property_area(description, calculated_area):
    response = validation_chain.run(description=description, calculated_area=calculated_area)
    # Extract the validation result from the response (assuming the model returns 'yes' or 'no')
    validation_result = response.strip().lower()
    return validation_result == 'yes'

# Main function to process the Excel file and calculate property areas
def process_properties(file_path):
    df = load_excel_file(file_path)

    # Create a dictionary to store the results
    property_areas = {}

    # Iterate through each row in the DataFrame and calculate property area
    for index, row in df.iterrows():
        property_id = row['property_id']
        description = row['property_description']
        area_in_square_meters = calculate_property_area(description)

        # Validate the calculated area
        if validate_property_area(description, area_in_square_meters):
            property_areas[property_id] = area_in_square_meters
        else:
            print(f"Validation failed for property ID {property_id}. Skipping this property.")

    # Convert the results to JSON format
    result_json = json.dumps(property_areas, indent=4)

    # Save the result to a JSON file
    with open('property_areas.json', 'w') as json_file:
        json_file.write(result_json)

    print("Property areas have been calculated and saved to 'property_areas.json'.")

# Run the main function with the provided Excel file path
process_properties('Copy of AI Lab 1 - Data - Topic 1- Information Extraction.xlsx')


ModuleNotFoundError: No module named 'langchain'