# Import Necessary Dependencies

In [72]:
import pandas as pd
import re
import time
from langchain.chat_models import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
import os

# Load dataset

In [74]:
# Load CSV file into pandas DataFrame
df = pd.read_csv("C:\\Users\\akhil\\Downloads\\addresses_company_missing1.csv")

# Setting Key

In [None]:
# OpenAI API Key (Replace with your actual key)
openai_api_key = "your api key"

# Initialize OpenAI Model

In [78]:
llm = ChatOpenAI(model_name="gpt-4", openai_api_key=openai_api_key, temperature=0)


# Define the system message 

In [80]:
system_message = SystemMessage(
    content="""
You are an expert in Indian geography. Your task is to find the **correct pincode** for a given address.
The address contains a **street name, location, and place**.

**Example Inputs & Outputs:**
    Address: "MG Road, Bangalore, Karnataka" → **Pincode: 560001**
    Address: "Sector 15, Gurgaon, Haryana" → **Pincode: 122001**
    Address: "Connaught Place, Delhi, Delhi" → **Pincode: 110001**

 **Rules:**
- Extract only the **6-digit** pincode.
- Do **not** make up a random pincode.
- If unsure, return `"000000"`.

Here is the given address:
"""
)


# Function to extract only the pincode from the response

In [82]:
def extract_pincode(response):
    match = re.search(r'\b\d{6}\b', response)  # Extracts valid 6-digit pincode
    return match.group(0) if match else '000000'  # Default if no pincode found

# Function to fill missing pincodes

In [84]:
def fill_missing_pincodes(row):
    if pd.isna(row['Pincode']) or row['Pincode'] == '':
        address = f"{row['Street Name']}, {row['Location']}, {row['Place']}"
        
        # Human message for the prompt
        human_message = HumanMessage(content=f"Find the correct pincode for based on this address: {address}")
        
        try:
            response = llm([system_message, human_message]).content  # Get the response from OpenAI
            pincode = extract_pincode(response)
            return pincode
        except Exception as e:
            print(f"Error fetching pincode for {address}: {e}")
            return '000000'  # Default for failures
    else:
        return row['Pincode']

In [85]:
# Apply function to dataframe with a delay to avoid rate limits
for i in range(len(df)):
    df.loc[i, 'Pincode'] = fill_missing_pincodes(df.iloc[i])
    time.sleep(1)  # Prevent hitting API limits

In [86]:
# Ensure Pincode is clean before conversion
def clean_pincode(value):
    try:
        return int(float(str(value).strip()))  # Convert to float first, then integer
    except ValueError:
        return None  # Keep it as None if conversion fails

# Apply cleaning function to Pincode column
df['Pincode'] = df['Pincode'].apply(clean_pincode)

# Save the cleaned dataset
df.to_csv('C:\\Users\\akhil\\Downloads\\addresses_company_missing1.csv', index=False)

print("Updated dataset saved successfully with integer pincodes.")


Updated dataset saved successfully with integer pincodes.


# Define the prompt template

In [88]:
prompt_template = PromptTemplate(
    input_variables=["street", "place", "location"],
    template="Given the following details:\nStreet Number: {street}\nPlace: {place}\nLocation: {location}\n"
             "What is the correct state for this address?",
)

In [None]:
openai_api_key = "your api key"

In [90]:
# Initialize the LLM model
llm = ChatOpenAI(model_name="gpt-4", openai_api_key=openai_api_key, temperature=0)

In [91]:
# Create the LangChain pipeline
chain = LLMChain(llm=llm, prompt=prompt_template)

# Finding states of all locations 

In [93]:
# Initialize ChatOpenAI model
chat = ChatOpenAI(model_name="gpt-4", temperature=0, openai_api_key=openai_api_key)

def get_state_from_location(street_name, place, location, pincode):
    """Extracts only the state name from location details using GPT-4."""
    messages = [
        SystemMessage(content="You are an AI assistant that extracts only the state name from location details. Do not return any extra text."),
        HumanMessage(content=f"""
        Given the following address details:
        - Street Name: {street_name}
        - Place: {place}
        - Location: {location}
        - Pincode: {pincode}
        
        Identify and return **only** the correct state name in India. Do not include any additional words, phrases, or explanations.
        """)
    ]
    
    response = chat(messages)
    return response.content.strip()

In [94]:
# Apply function to update the State column
df['State'] = df.apply(lambda row: get_state_from_location(row['Street Name'], row['Place'], row['Location'], row['Pincode']), axis=1)

In [120]:
# Save updated dataset
df.to_csv("C:\\Users\\akhil\\Downloads\\addresses_company_missing1.csv", index=False)

print("State column added successfully!")

State column added successfully!


# Finding countries of all locations 

In [128]:
def get_country_from_location(street_name, place, location, pincode, state):
    """Extracts only the country name from location details using GPT-4."""
    messages = [
        SystemMessage(content="You are an AI assistant that extracts only the Country name from location details. Do not return any extra text."),
        HumanMessage(content=f"""
        Given the following address details:
        - Street Name: {street_name}
        - Place: {place}
        - Location: {location}
        - Pincode: {pincode}
        - State: {state}
        
        Identify and return **only** the correct Country name in India. Do not include any additional words, phrases, or explanations.
        """)
    ]
    
    response = chat(messages)
    return response.content.strip()

In [130]:
# Apply function to update the State column
df['Country'] = df.apply(lambda row: get_state_from_location(row['Street Name'], row['Place'], row['Location'], row['Pincode'],row['State']), axis=1)

In [131]:
# Save updated dataset
df.to_csv("C:\\Users\\akhil\\Downloads\\addresses_company_missing1.csv", index=False)

print("Country column added successfully!")

Country column added successfully!
