In [21]:
import pandas as pd
from rdflib import Graph, Literal, RDF, Namespace, URIRef
from rdflib.namespace import XSD
from datetime import datetime

In [20]:
#df=pd.read_csv('ufo_report_br.csv')
df = pd.read_csv('ufo_sightings.csv')
df.dropna(inplace=True)
df = df[0:50]

In [22]:
def convert_date_to_iso(date_str):
    try:
        return datetime.strptime(date_str, "%m/%d/%Y").date().isoformat()
    except ValueError:
        return None

def convert_time_to_iso(time_str):
    try:
        return datetime.strptime(time_str, "%H:%M").time().isoformat()
    except ValueError:
        return None

In [23]:
P1 = Namespace("http://example.org/p1#")
EX = Namespace("http://example.org/")

g = Graph()

g.bind("p1", P1)
g.bind("ex", EX)

In [24]:
unique_locations = {}
unique_shapes = {}

for index, row in df.iterrows():
    sighting_event = EX[f"SightingEvent{index}"]
    witness = EX[f"Witness{index}"]
    
    date_str, time_str = row['date_time'].split()
    iso_date = convert_date_to_iso(date_str)
    iso_time = convert_time_to_iso(time_str)

    g.add((sighting_event, RDF.type, P1.SightingEvent))
    if iso_date:
        g.add((sighting_event, P1.date, Literal(iso_date, datatype=XSD.date)))
    if iso_time:
        g.add((sighting_event, P1.time, Literal(iso_time, datatype=XSD.time)))
    if not pd.isna(row['encounter_length']):
        g.add((sighting_event, P1.duration, Literal(f"PT{int(row['encounter_length'])}S", datatype=XSD.duration)))

    location_key = (row['latitude'], row['longitude'])
    if location_key not in unique_locations:
        location = EX[f"Location{len(unique_locations)}"]
        unique_locations[location_key] = location
        g.add((location, RDF.type, P1.Location))
        g.add((location, P1.latitude, Literal(row['latitude'], datatype=XSD.decimal)))
        g.add((location, P1.longitude, Literal(row['longitude'], datatype=XSD.decimal)))
    else:
        location = unique_locations[location_key]
    g.add((sighting_event, P1.location, location))
    
    shape_key = row['ufo_shape']
    if shape_key not in unique_shapes:
        shape = EX[f"Shape{len(unique_shapes)}"]
        unique_shapes[shape_key] = shape
        g.add((shape, RDF.type, P1.Shape))
        g.add((shape, P1.name, Literal(row['ufo_shape'], datatype=XSD.string)))
    else:
        shape = unique_shapes[shape_key]
    g.add((sighting_event, P1.shape, shape))
    
    if 'witness_name' in row and 'witness_age' in row:
        g.add((sighting_event, P1.witnessOf, witness))
        g.add((witness, RDF.type, P1.Witness))
        g.add((witness, P1.name, Literal(row['witness_name'], datatype=XSD.string)))
        g.add((witness, P1.age, Literal(row['witness_age'], datatype=XSD.integer)))
        gender = P1.male if row.get('witness_gender', '').lower() == 'male' else P1.female
        g.add((witness, P1.gender, gender))

g.serialize(destination='ufo_sightings.rdf', format='turtle')

KeyError: 'date_time'

In [13]:
convert_date_to_iso('10/10/1978')

'1978-10-10'