In [None]:
import pandas as pd
from tika import parser

# Path to the CSV file containing BFRO sightings data
file_path = 'bfro_reports.csv'

# Load the dataset from the CSV file
data = pd.read_csv(file_path)

# Function to extract geographic information using Tika's GeoTopicParser
def extract_geo_info(text):
    # Parse the text using Tika's GeoTopicParser
    parsed = parser.from_buffer(text, requestOptions={'handlers': ['geotopic']})

    # Extract location name, latitude, and longitude from the parsed data
    geo_info = parsed['metadata'].get('Geographic_NAME', 'Unknown')
    lat = parsed['metadata'].get('Geographic_LATITUDE', 'Unknown')
    lng = parsed['metadata'].get('Geographic_LONGITUDE', 'Unknown')

    return geo_info, lat, lng

# Add new columns to the dataset for the extracted geographic information
data['Location_Name'] = 'Unknown'
data['Latitude'] = 'Unknown'
data['Longitude'] = 'Unknown'

# Iterate over each sighting report in the dataset
for index, row in data.iterrows():
    # Assume the text of the sighting report is in a column named 'report_text'
    report_text = row['report_text']

    # Extract geographic information from the report text
    location_name, latitude, longitude = extract_geo_info(report_text)

    # Update the dataset with the extracted information
    data.at[index, 'Location_Name'] = location_name
    data.at[index, 'Latitude'] = latitude
    data.at[index, 'Longitude'] = longitude

# Save the updated dataset to a new CSV file
data.to_csv('updated_bfro_reports.csv', index=False)

# Print a message indicating the process is completed
print("Geographic information extracted and added to the dataset.")
