In [None]:
import pandas as pd
import requests

def extract_geo_info(text):
    """
    Extract geographic information using Apache Tika's GeoTopicParser.

    Parameters:
        text (str): The text content to parse for geographic information.

    Returns:
        tuple: A tuple containing the location name, latitude, and longitude.
    """
    # Assume Apache Tika server is running on localhost at port 9998
    tika_url = 'http://localhost:9998/rmeta'
    headers = {'Content-Disposition': 'inline; filename=file.txt', 'Accept': 'application/json'}

    response = requests.put(tika_url, headers=headers, data=text.encode('utf-8'))

    if response.status_code == 200:
        parsed = response.json()[0]
        geo_info = parsed.get('Geographic_NAME', 'Unknown')
        lat = parsed.get('Geographic_LATITUDE', 'Unknown')
        lng = parsed.get('Geographic_LONGITUDE', 'Unknown')
    else:
        print(f"Error: {response.status_code}")
        geo_info, lat, lng = 'Unknown', 'Unknown', 'Unknown'

    return geo_info, lat, lng

# Load the data from a local file (update the file path as necessary for your local environment)
data = pd.read_csv('path/to/your/final_report.csv')  # Change the file path

data['Location_Name'] = 'Unknown'
data['Latitude'] = 'Unknown'
data['Longitude'] = 'Unknown'

for index, row in data.iterrows():
    relevant_fields = ['State', 'County', 'Location Details', 'Nearest Town', 'Nearest Road', 'Observed', 'Also Noticed', 'Other Witnesses']
    report_text = ' '.join(str(row[field]) for field in relevant_fields if pd.notnull(row[field]))

    location_name, latitude, longitude = extract_geo_info(report_text)

    data.at[index, 'Location_Name'] = location_name
    data.at[index, 'Latitude'] = latitude
    data.at[index, 'Longitude'] = longitude

# Save the updated dataset to a new local CSV file
data.to_csv('path/to/your/updated_final_report.csv', index=False)  # Change the file path

print("Geographic information extracted and added to the dataset.")

