## Neo4j_Prepare
* Background Info + Schema/Data Preparation
### Intro
* Data read from mysql and then partially inserted to Neo4j

---

### "Schema"
* took a part of entity from mysql that is meaningful to Neo4j
#### 节点（Nodes）

| 节点类型  | 属性列表 |
|:---------|:---------|
| Airport  | airport_id, name, city, country, latitude, longitude |
| Airline  | airline_id, name, country |
| Aircraft | aircraft_id, model, manufacturer |
| Passenger | passenger_id, first_name, last_name, nationality |
| Flight   | flight_id, flight_number, departure_time, arrival_time, flight_date, status |

#### 关系（Relationships）

| 关系类型 | 起点节点 -> 终点节点 | 属性列表 |
|:---------|:------------------|:---------|
| FLIES_TO | Airport -> Airport | route_id |
| BOOKED   | Passenger -> Flight | seat_number, ticket_price |
| OPERATES | Airline -> Flight   | （无额外属性） |
| USES     | Flight -> Aircraft  | （无额外属性） |

---

### Neo4j Credentials
* Wait 60 seconds before connecting using these details, or login to https://console.neo4j.io to validate the Aura Instance is available
    * NEO4J_URI=
    * NEO4J_USERNAME=neo4j
    * NEO4J_PASSWORD=
    * AURA_INSTANCEID=
    * AURA_INSTANCENAME=Free instance


In [None]:
pip install neo4j

In [9]:
from neo4j import GraphDatabase
from sqlalchemy import create_engine
import pandas as pd


# ⭐️Neo4j 数据库信息⭐️ (可以去创建一个neo4j aura 账号之后换成你的)
NEO4J_URI = ""
NEO4J_USERNAME = ""
NEO4J_PASSWORD = ""

# MySQL 数据库信息
username = ''
password = ''
host = ''
port = ''
database = ''

# 创建 Neo4j 驱动
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))
# 验证连接
with driver.session() as session:
    session.run("RETURN 1")
    print("连接成功！")
    # 清空整个 Neo4j before 加入数据
    session.run("MATCH (n) DETACH DELETE n")
    print("✅ 已清空 Neo4j 中所有节点和关系")


# 连接MySQL
engine = create_engine(
    f"mysql+pymysql://{username}:{password}@{host}:{port}/{database}",
    connect_args={"charset": "utf8mb4"}
)

# 用来跑 query 的 function
def run_query(query):
    with driver.session() as session:
        result = session.run(query)
        return [record.data() for record in result]

# ---------------------------------------------------
airports_df = pd.read_sql("""
SELECT airport_id, name, city, country, latitude, longitude
FROM airports
""", con=engine)

for _, row in airports_df.iterrows():
    query = f"""
    CREATE (:Airport {{
        airport_id: {row['airport_id']},
        name: "{row['name'].replace('"', '\\"')}",
        city: "{row['city'].replace('"', '\\"')}",
        country: "{row['country'].replace('"', '\\"')}",
        latitude: {row['latitude']},
        longitude: {row['longitude']}
    }})
    """
    run_query(query)

airlines_df = pd.read_sql("""
SELECT airline_id, name, country
FROM airlines
""", con=engine)

for _, row in airlines_df.iterrows():
    query = f"""
    CREATE (:Airline {{
        airline_id: {row['airline_id']},
        name: "{row['name'].replace('"', '\\"')}",
        country: "{row['country'].replace('"', '\\"')}"
    }})
    """
    run_query(query)

aircrafts_df = pd.read_sql("""
SELECT aircraft_id, model, manufacturer
FROM aircrafts
""", con=engine)

for _, row in aircrafts_df.iterrows():
    query = f"""
    CREATE (:Aircraft {{
        aircraft_id: {row['aircraft_id']},
        model: "{row['model'].replace('"', '\\"')}",
        manufacturer: "{row['manufacturer'].replace('"', '\\"')}"
    }})
    """
    run_query(query)

passengers_df = pd.read_sql("""
SELECT passenger_id, first_name, last_name, nationality
FROM passengers
""", con=engine)

for _, row in passengers_df.iterrows():
    query = f"""
    CREATE (:Passenger {{
        passenger_id: {row['passenger_id']},
        first_name: "{row['first_name'].replace('"', '\\"')}",
        last_name: "{row['last_name'].replace('"', '\\"')}",
        nationality: "{row['nationality'].replace('"', '\\"')}"
    }})
    """
    run_query(query)

flights_df = pd.read_sql("""
SELECT flight_id, flight_number, departure_time, arrival_time, flight_date, status
FROM flights
""", con=engine)

for _, row in flights_df.iterrows():
    query = f"""
    CREATE (:Flight {{
        flight_id: {row['flight_id']},
        flight_number: "{row['flight_number'].replace('"', '\\"')}",
        departure_time: "{row['departure_time']}",
        arrival_time: "{row['arrival_time']}",
        flight_date: "{row['flight_date']}",
        status: "{row['status']}"
    }})
    """
    run_query(query)

routes_df = pd.read_sql("""
SELECT route_id, source_airport_id, destination_airport_id
FROM routes
""", con=engine)

for _, row in routes_df.iterrows():
    query = f"""
    MATCH (a1:Airport {{airport_id: {row['source_airport_id']}}}),
          (a2:Airport {{airport_id: {row['destination_airport_id']}}})
    CREATE (a1)-[:FLIES_TO {{route_id: {row['route_id']}}}]->(a2)
    """
    run_query(query)

bookings_df = pd.read_sql("""
SELECT booking_id, passenger_id, flight_id, seat_number, ticket_price
FROM bookings
""", con=engine)

for _, row in bookings_df.iterrows():
    query = f"""
    MATCH (p:Passenger {{passenger_id: {row['passenger_id']}}}),
          (f:Flight {{flight_id: {row['flight_id']}}})
    CREATE (p)-[:BOOKED {{
        seat_number: "{row['seat_number']}",
        ticket_price: {row['ticket_price']}
    }}]->(f)
    """
    run_query(query)

routes_flights_df = pd.read_sql("""
SELECT f.flight_id, r.airline_id
FROM flights f
JOIN routes r ON f.route_id = r.route_id
""", con=engine)

for _, row in routes_flights_df.iterrows():
    query = f"""
    MATCH (al:Airline {{airline_id: {row['airline_id']}}}),
          (f:Flight {{flight_id: {row['flight_id']}}})
    CREATE (al)-[:OPERATES]->(f)
    """
    run_query(query)

flights_aircrafts_df = pd.read_sql("""
SELECT flight_id, aircraft_id
FROM flights
""", con=engine)

for _, row in flights_aircrafts_df.iterrows():
    query = f"""
    MATCH (f:Flight {{flight_id: {row['flight_id']}}}),
          (ac:Aircraft {{aircraft_id: {row['aircraft_id']}}})
    CREATE (f)-[:USES]->(ac)
    """
    run_query(query)


# add a location for every Airport node, so that we can do GeoSpatial
geoQuery = """
// 给每个机场加一个 Point
MATCH (a:Airport)
SET a.location = point({latitude: a.latitude, longitude: a.longitude})
"""
run_query(geoQuery)

连接成功！
✅ 已清空 Neo4j 中所有节点和关系


[]