In [1]:
import pandas as pd
from tqdm import tqdm


In [2]:
use_col_locations = ['result_id','long_label','add_num','st_name','st_type','subregion','region','country','city','x','y']

collision_details = pd.read_csv('collision_details.csv')
basic = pd.read_csv('basic_reports.csv')
locations = pd.read_csv('locations.csv', usecols =  use_col_locations)


In [3]:
collision = pd.merge(collision_details, basic, on='report_id', how='outer', suffixes=('', '_duplicate'))
# Drop the duplicate columns without suffixes
collision = collision.loc[:, ~collision.columns.str.endswith('_duplicate')].reset_index()
collision.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 119200 entries, 0 to 119199
Data columns (total 23 columns):
 #   Column                     Non-Null Count   Dtype 
---  ------                     --------------   ----- 
 0   index                      119200 non-null  int64 
 1   report_id                  119200 non-null  object
 2   date_time                  119200 non-null  object
 3   person_role                116087 non-null  object
 4   person_injury_lvl          29022 non-null   object
 5   person_veh_type            52367 non-null   object
 6   veh_type                   66602 non-null   object
 7   veh_make                   106503 non-null  object
 8   veh_model                  81397 non-null   object
 9   police_beat                119200 non-null  int64 
 10  address_no_primary         119200 non-null  int64 
 11  address_pd_primary         119200 non-null  object
 12  address_road_primary       119199 non-null  object
 13  address_sfx_primary        119188 non-null  

In [4]:
collision = pd.merge(collision, locations,left_on='index', right_on='result_id', how='outer', suffixes=('', '_duplicate'))
# Drop the duplicate columns without suffixes
collision = collision.loc[:, ~collision.columns.str.endswith('_duplicate')]

In [5]:
collision['point'] = collision.apply(lambda row: (row['x'], row['y']), axis=1)


In [6]:
collision.head()

Unnamed: 0,index,report_id,date_time,person_role,person_injury_lvl,person_veh_type,veh_type,veh_make,veh_model,police_beat,...,add_num,st_name,st_type,city,subregion,region,country,x,y,point
0,0,171111,2015-01-14 20:00:00,PARKED VEHICLE OCCUPANT,,,,TOYOTA,RAV4,835,...,4200,Juniper,St,San Diego,San Diego County,California,USA,-117.103574,32.729736,"(-117.10357366462, 32.72973584447)"
1,1,192016,2015-03-19 12:00:00,PARKED VEHICLE OCCUPANT,,,AUTOMOBILE,FORD,,622,...,5200,Linda Vista,Rd,San Diego,San Diego County,California,USA,-117.196995,32.76538,"(-117.19699514643, 32.76537999747)"
2,2,190012,2015-03-24 03:05:00,DRIVER,VISABLE,,,NISSAN,ALTIMA,626,...,1000,Washington,St,San Diego,San Diego County,California,USA,-117.172575,32.749898,"(-117.17257495838, 32.74989781564)"
3,3,191866,2015-03-27 23:56:00,DRIVER,PAIN,,LIGHT DUTY TRUCK,FORD,F150,613,...,2800,Worden,St,San Diego,San Diego County,California,USA,-117.224608,32.74991,"(-117.2246075723, 32.74991035316)"
4,4,185207,2015-07-06 11:45:00,DRIVER,,,,FORD,MUSTANG,813,...,2800,El Cajon,Blvd,San Diego,San Diego County,California,USA,-117.133472,32.755418,"(-117.13347203896, 32.75541750843)"


In [7]:
from neo4j import GraphDatabase

In [32]:
uri = "bolt://localhost:7666"
username = "neo4j"
password = "password"

def create_neo4j_session(uri, username, password):
    try:
        driver = GraphDatabase.driver(uri, auth=(username, password))
        session = driver.session()
        return session
    except Exception as e:
        print(f"Failed to create Neo4j session: {e}")
        return None


In [40]:
session = create_neo4j_session(uri, username, password)

In [34]:
sd = collision[collision.region == 'California']
sd.head()

Unnamed: 0,index,report_id,date_time,person_role,person_injury_lvl,person_veh_type,veh_type,veh_make,veh_model,police_beat,...,add_num,st_name,st_type,city,subregion,region,country,x,y,point
0,0,171111,2015-01-14 20:00:00,PARKED VEHICLE OCCUPANT,,,,TOYOTA,RAV4,835,...,4200,Juniper,St,San Diego,San Diego County,California,USA,-117.103574,32.729736,"(-117.10357366462, 32.72973584447)"
1,1,192016,2015-03-19 12:00:00,PARKED VEHICLE OCCUPANT,,,AUTOMOBILE,FORD,,622,...,5200,Linda Vista,Rd,San Diego,San Diego County,California,USA,-117.196995,32.76538,"(-117.19699514643, 32.76537999747)"
2,2,190012,2015-03-24 03:05:00,DRIVER,VISABLE,,,NISSAN,ALTIMA,626,...,1000,Washington,St,San Diego,San Diego County,California,USA,-117.172575,32.749898,"(-117.17257495838, 32.74989781564)"
3,3,191866,2015-03-27 23:56:00,DRIVER,PAIN,,LIGHT DUTY TRUCK,FORD,F150,613,...,2800,Worden,St,San Diego,San Diego County,California,USA,-117.224608,32.74991,"(-117.2246075723, 32.74991035316)"
4,4,185207,2015-07-06 11:45:00,DRIVER,,,,FORD,MUSTANG,813,...,2800,El Cajon,Blvd,San Diego,San Diego County,California,USA,-117.133472,32.755418,"(-117.13347203896, 32.75541750843)"


In [35]:
def create_address_with_person(tx, address, latitude, longitude, date_time, num_injured, num_killed, person_role, veh_make, veh_model, charge_desc):
#     print(num_injured)
    tx.run(
        """
        MERGE (a:Address {address: $address, latitude: $latitude, longitude: $longitude})
        CREATE (a)<-[:HAS_ACCIDENT_INFO]-(ai:AccidentInfo {date_time: $date_time, num_injured: $num_injured, num_killed: $num_killed})
        CREATE (ai)<-[:INVOLVED_IN]-(p:Person {person_role: $person_role, veh_make: $veh_make, veh_model: $veh_model, charge_desc: $charge_desc})
        """,
        address=address,
        latitude=latitude,
        longitude=longitude,
        date_time=date_time,
        num_injured=int(num_injured),
        num_killed=int(num_killed),
        person_role=person_role,
        veh_make=veh_make,
        veh_model=veh_model,
        charge_desc=charge_desc
    )

In [41]:
# with driver.session() as session:
for index, row in tqdm(sd.iloc[:8000,:].iterrows()):
    address=row['long_label']
    latitude=row['y']
    longitude=row['x']
    date_time=row['date_time']
    num_injured=row['injured']
    num_killed=row['killed']
    person_role=row['person_role']
    veh_make=row['veh_make']
    veh_model=row['veh_model']
    charge_desc=row['charge_desc']

    session.execute_write(
        create_address_with_person,
        address,
        latitude,
        longitude,
        date_time,
        num_injured, num_killed,
       person_role,
        veh_make,
        veh_model,
        charge_desc
    )



8000it [00:40, 195.96it/s]


In [42]:
import time

In [43]:
start = time.time()
def connect_addresses_within_range(tx, range_km):
    result = tx.run(
        """
        MATCH (a1:Address), (a2:Address)
        WHERE id(a1) <> id(a2)
        AND point.distance(point({latitude: a1.latitude, longitude: a1.longitude}),
                     point({latitude: a2.latitude, longitude: a2.longitude}))
            <= $range_km  // Convert range_km to meters
        CREATE (a1)-[:WITHIN_RANGE {distance_m: point.distance(point({latitude: a1.latitude, longitude: a1.longitude}),
                                                          point({latitude: a2.latitude, longitude: a2.longitude}))}]
            ->(a2)
        """,
        range_km=range_km * 1000  # Convert range from kilometers to meters
    )
    return result

# Example usage:
range_km = 0.5 # Specify the desired range in kilometers

# with driver.session() as session:
session.execute_write(connect_addresses_within_range, range_km)
end = time.time()
print(f'time takes to run the query {end-start}')

time takes to run the query 29.91241765022278


In [38]:
def connect_addresses_within_range(tx, range_km):
    result = tx.run(
        """
        MATCH (a1:Address), (a2:Address)
        WHERE a1 <> a2
        AND distance(point({latitude: a1.latitude, longitude: a1.longitude}),
                     point({latitude: a2.latitude, longitude: a2.longitude}))
            <= $range_km  // Convert range_km to meters
        CREATE (a1)-[:WITHIN_RANGE {distance_m: distance(point({latitude: a1.latitude, longitude: a1.longitude}),
                                                          point({latitude: a2.latitude, longitude: a2.longitude}))}]
            ->(a2)
        """,
        range_km=range_km * 1000  # Convert range from kilometers to meters
    )
    return result

# Example usage:
range_km = 0.5 # Specify the desired range in kilometers

# with driver.session() as session:
session.execute_write(connect_addresses_within_range, range_km)


<neo4j._sync.work.result.Result at 0x290f12eba60>

In [39]:
session = create_neo4j_session(uri, username, password)

# Execute a sample query
query = "MATCH (n) DETACH DELETE n"
result = session.run(query)

# Close the session and driver
session.close()