# The imports

In [1]:
import pandas as pd 
import google.generativeai as genai
import json

# Setting up the API key for gemini

In [None]:
genai.configure(api_key='YOUR_API_KEY')
model = genai.GenerativeModel('models/gemini-2.0-flash-exp')

# Reading the dataset

In [3]:
df_read = pd.read_csv('real_estate_data.csv')

# Consultation

In [4]:
df_read.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 6 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   bedrooms            200 non-null    int64 
 1   price               200 non-null    int64 
 2   state               200 non-null    object
 3   quality             200 non-null    int64 
 4   size_in_meters      200 non-null    int64 
 5   real_estate_number  200 non-null    object
dtypes: int64(4), object(2)
memory usage: 9.5+ KB


In [5]:
df_read.head(2)

Unnamed: 0,bedrooms,price,state,quality,size_in_meters,real_estate_number
0,2,333249,Komaina,7,200,RE-1000
1,4,447481,PikPoun,1,287,RE-1001


In [6]:
df_read.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 6 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   bedrooms            200 non-null    int64 
 1   price               200 non-null    int64 
 2   state               200 non-null    object
 3   quality             200 non-null    int64 
 4   size_in_meters      200 non-null    int64 
 5   real_estate_number  200 non-null    object
dtypes: int64(4), object(2)
memory usage: 9.5+ KB


# Prompt engeneering<br> and retrieving the filter <br>and the prompt type

In [7]:
user_query = "What are the real_estate numbers of the high quality houses in Komaina ?"
prompt1=f"""
Task: Analyze the real estate query and extract filtering conditions.
    
    Available columns and their types:
    - bedrooms (int): Number of bedrooms
    - price (int): Price in dollars
    - state (str): State name
    - quality (int): Quality rating (1-10)
    - size_in_meters (int): Size in square meters
    - real_estate_number (str): Unique identifier

User query: "{user_query}"
Extract the available columns data and return them as a JSON file 
and a "prompt_type": One of ["quality", "price", "general"] 
Example response:
    {{
            "bedrooms": 2,
            "price": [300000],
            "state": "Toulkana"
            "prompt_type": "price",
    }};

    Provide only the JSON response, no additional text.
"""

response = model.generate_content(prompt1)

# Cleaning the filter to use

In [8]:
def extract_json(response_text):
    start_marker = "```json\n"
    end_marker = "\n```"
    
    start = response_text.find(start_marker)
    if start == -1:
        raise ValueError("Start marker not found in the response.")
    
    start += len(start_marker)
    end = response_text.find(end_marker, start)
    if end == -1:
        raise ValueError("End marker not found in the response.")
    
    json_content = response_text[start:end]
    return json_content

filter = extract_json(response.text)
filtered_docs = json.loads(filter)

# The filter

In [9]:
filtered_docs

{'state': 'Komaina', 'quality': [7, 8, 9, 10], 'prompt_type': 'quality'}

# Filtering the dataset

In [10]:
def apply_filter(df, filter_dict):
    temp = df.copy()
    for key, value in filter_dict.items():
        if key in df.columns:
            if key != 'quality' :
                temp = temp[temp[key]==value]
                
            elif key =='quality':
                temp = temp[temp['quality'].isin(value)]


    return temp

# Apply the filter
filtered_df = apply_filter(df_read, filtered_docs)


# Filtring the prompt type and assigning<br>the prompt and asking the LLM

In [None]:
if not filtered_df.empty:
    if filtered_docs['prompt_type'] == 'quality':
        prompt = f"""
        Real Estate Quality Analysis Task
            Original user query: "{user_query}"
            Filtered data: "{filtered_df}"
            Give the user a nice and short report of the quality from the retrieved 
            document in a very profesional way .
        """
    elif filtered_docs['prompt_type'] == 'price':
        prompt = f"""
        Real Estate price Analysis Task
            Original user query: "{user_query}"
            Filtered data: "{filtered_df}"
            Give the user a nice and short report of the price from the retrieved 
            document in a very profesional way .
        """
    elif filtered_docs['prompt_type'] == 'general':
        prompt = f"""
        Real Estate general Analysis Task
            Original user query: "{user_query}"
            Filtered data: "{filtered_df}"
            Give the user a nice and short report of the requirements of the user from 
            the retrieved document in a very profesional way .
        """
    response = model.generate_content(prompt)
    print(response.text)
else :
    print('The retrieved documents are empty')


Okay, here's a professional and concise report on the real estate quality in Komaina, based on the provided data:

**Real Estate Quality Analysis - Komaina**

This report summarizes the quality of select properties in Komaina. Based on a quality scale where 10 is the highest, the following real estate numbers represent high-quality homes:

*   **Quality 7:** RE-1000 and RE-1089
*   **Quality 8:** RE-1106
*   **Quality 9:** RE-1040, RE-1138, RE-1194
*   **Quality 10:** RE-1109, RE-1140

This indicates that several properties in Komaina achieve high quality scores, with the majority reaching a rating of 9 and 10.

