In [None]:
import pandas as pd
import requests

# Function to extract geographic information from a given text using Tika's GeoTopicParser
def extract_geo_info(text):
    """
    Extract geographic information using Apache Tika's GeoTopicParser.

    Parameters:
        text (str): The text content to parse for geographic information.

    Returns:
        tuple: A tuple containing the location name, latitude, and longitude.
    """
    # Define the URL of the Tika server's GeoTopicParser endpoint
    tika_url = 'http://localhost:9998/rmeta'
    headers = {'Content-Disposition': 'inline; filename=file.txt', 'Accept': 'application/json'}

    # Send the text to the Tika server and get the response
    response = requests.put(tika_url, headers=headers, data=text.encode('utf-8'))

    # If the request was successful, parse the response
    if response.status_code == 200:
        parsed = response.json()[0]
        geo_info = parsed.get('Geographic_NAME', 'Unknown')
        lat = parsed.get('Geographic_LATITUDE', 'Unknown')
        lng = parsed.get('Geographic_LONGITUDE', 'Unknown')
    else:
        print(f"Error: {response.status_code}")
        geo_info, lat, lng = 'Unknown', 'Unknown', 'Unknown'

    return geo_info, lat, lng

# Load the BFRO sightings data from the CSV file
data = pd.read_csv('/content/final_report.csv')

# Add new columns to the dataset for location name, latitude, and longitude
data['Location_Name'] = 'Unknown'
data['Latitude'] = 'Unknown'
data['Longitude'] = 'Unknown'

# Iterate over each sighting report in the dataset
for index, row in data.iterrows():
    # Concatenate the relevant fields to form the text to be parsed for geographic information
    relevant_fields = ['State', 'County', 'Location Details', 'Nearest Town', 'Nearest Road', 'Observed', 'Also Noticed', 'Other Witnesses']
    report_text = ' '.join(str(row[field]) for field in relevant_fields if pd.notnull(row[field]))

    # Extract geographic information from the concatenated text
    location_name, latitude, longitude = extract_geo_info(report_text)

    # Update the dataset with the extracted geographic information
    data.at[index, 'Location_Name'] = location_name
    data.at[index, 'Latitude'] = latitude
    data.at[index, 'Longitude'] = longitude

# Save the updated dataset to a new CSV file
data.to_csv('/content/updated_final_report.csv', index=False)

# Print a message indicating the process is completed
print("Geographic information extracted and added to the dataset.")

