This code is responsible for generating rankings for properties and nodes. 

These are the expected properties for each label:

Expected properties of Airport

[
  "tz_database_timezone",
  "commercial_flights", 
  "location", 
  "altitude", 
  "dst", 
  "type", 
  "runwaysLength", 
  "country", 
  "city", 
  "iata", 
  "timezone", 
  "source", 
  "runways",
  "icao", 
  "name", 
  "distCity", 
  "longitude", 
  "latitude", 
  "runwaysWidth", 
  "airport_id",
]

Expected properties of Country 

[
  "region",
  "labor_participation",
  "physicians",
  "unemployment",
  "intermediate_region_code",
  "gasoline_price",
  "gdp",
  "cpi_change",
  "capital",
  "co2_emissions",
  "iso_3166_2",
  "largest_city",
  "alpha3",
  "total_tax_rate",
  "calling_code",
  "tax_revenue",
  "sub_region_code",
  "armed_forces",
  "name",
  "forested_area",
  "fertility_rate",
  "longitude",
  "currency_code",
  "urban_population",
  "cpi",
  "population",
  "infant_mortality",
  "land_area",
  "minimum_wage",
  "code",
  "education_primary",
  "country_code",
  "birth_rate",
  "maternal_mortality",
  "region_code",
  "education_tertiary",
  "intermediate_region",
  "latitude",
  "language",
  "sub_region",
  "life_expectancy",
  "agricultural_land",
  "name2",
  "health_expenditure"
]


Expected properties of City

[
  "health_care_index",
  "countrycode",
  "accent_name",
  "location",
  "climate_index",
  "purchasing_power_index",
  "traffic_commute_time_index",
  "cost_of_living_index",
  "country",
  "pollution_index",
  "quality_of_life_index",
  "rank",
  "safety_index",
  "name",
  "longitude",
  "property_price_to_income_ratio",
  "latitude",
  "population"
]


Missing properties of Airports

In [None]:
from neo4j import GraphDatabase

NEO4J_URI = "neo4j://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "password"

LABEL = "Airport"

EXPECTED_PROPERTIES = [
  "tz_database_timezone",
  "commercial_flights", 
  "location", 
  "altitude", 
  "dst", 
  "type", 
  "runwaysLength", 
  "country", 
  "city", 
  "iata", 
  "timezone", 
  "source", 
  "runways",
  "icao", 
  "name", 
  "distCity", 
  "longitude", 
  "latitude", 
  "runwaysWidth", 
  "airport_id",
] 


# -----------------------------------------------
# NEO4J CONNECTION
# -----------------------------------------------
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))


def fetch_nodes_and_props(tx, label):
    query = f"""
    MATCH (n:{label})
    RETURN id(n) AS nodeId, properties(n) AS props
    """
    return [(record["nodeId"], record["props"]) for record in tx.run(query)]


# -----------------------------------------------
# MAIN SCRIPT
# -----------------------------------------------
with driver.session(database="aiportcorrect") as session:
    nodes = session.execute_read(fetch_nodes_and_props, LABEL)


driver.close()


# -----------------------------------------------
# PROPERTY VALIDATION PER NODE
# -----------------------------------------------
missing_per_node = []

for node_id, props in nodes:
    node_keys = set(props.keys())
    missing = set(EXPECTED_PROPERTIES) - node_keys

    if missing:
        missing_per_node.append((node_id, sorted(list(missing))))

# -----------------------------------------------
# REPORT
# -----------------------------------------------
print("=== MISSING PROPERTY REPORT PER NODE ===")
print(f"Label: {LABEL}")
print(f"Total nodes: {len(nodes)}")
print(f"Nodes missing at least one property: {len(missing_per_node)}\n")

# -----------------------------------------------
# MISSING PROPERTY FREQUENCY MAP
# -----------------------------------------------
missing_frequency = {prop: 0 for prop in EXPECTED_PROPERTIES}

for _, missing in missing_per_node:
    for prop in missing:
        missing_frequency[prop] += 1

missing_sorted = sorted(missing_frequency.items(), key=lambda x: x[1], reverse=True)

print("\n=== MISSING PROPERTY FREQUENCY MAP ===")
for prop, count in missing_sorted:
    print(f"{prop}: {count}")





=== MISSING PROPERTY REPORT PER NODE ===
Label: Airport
Total nodes: 7698
Nodes missing at least one property: 7247


=== MISSING PROPERTY FREQUENCY MAP ===
commercial_flights: 6954
distCity: 3620
runwaysLength: 2425
runways: 2425
runwaysWidth: 2425
city: 49
tz_database_timezone: 0
location: 0
altitude: 0
dst: 0
type: 0
country: 0
iata: 0
timezone: 0
source: 0
icao: 0
name: 0
longitude: 0
latitude: 0
airport_id: 0


Missing properties of Country

In [None]:
from neo4j import GraphDatabase


NEO4J_URI = "neo4j://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "password"

LABEL = "Country"
EXPECTED_PROPERTIES = [
  "region",
  "labor_participation",
  "physicians",
  "unemployment",
  "intermediate_region_code",
  "gasoline_price",
  "gdp",
  "cpi_change",
  "capital",
  "co2_emissions",
  "iso_3166_2",
  "largest_city",
  "alpha3",
  "total_tax_rate",
  "calling_code",
  "tax_revenue",
  "sub_region_code",
  "armed_forces",
  "name",
  "forested_area",
  "fertility_rate",
  "longitude",
  "currency_code",
  "urban_population",
  "cpi",
  "population",
  "infant_mortality",
  "land_area",
  "minimum_wage",
  "code",
  "education_primary",
  "country_code",
  "birth_rate",
  "maternal_mortality",
  "region_code",
  "education_tertiary",
  "intermediate_region",
  "latitude",
  "language",
  "sub_region",
  "life_expectancy",
  "agricultural_land",
  "name2",
  "health_expenditure"
]


# -----------------------------------------------
# NEO4J CONNECTION
# -----------------------------------------------
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))


def fetch_nodes_and_props(tx, label):
    query = f"""
    MATCH (n:{label})
    RETURN id(n) AS nodeId, properties(n) AS props
    """
    return [(record["nodeId"], record["props"]) for record in tx.run(query)]


# -----------------------------------------------
# MAIN SCRIPT
# -----------------------------------------------
with driver.session(database="aiportcorrect") as session:
    nodes = session.execute_read(fetch_nodes_and_props, LABEL)


driver.close()


# -----------------------------------------------
# PROPERTY VALIDATION PER NODE
# -----------------------------------------------
missing_per_node = []

for node_id, props in nodes:
    node_keys = set(props.keys())
    missing = set(EXPECTED_PROPERTIES) - node_keys

    if missing:
        missing_per_node.append((node_id, sorted(list(missing))))

# -----------------------------------------------
# REPORT
# -----------------------------------------------
print("=== MISSING PROPERTY REPORT PER NODE ===")
print(f"Label: {LABEL}")
print(f"Total nodes: {len(nodes)}")
print(f"Nodes missing at least one property: {len(missing_per_node)}\n")

# -----------------------------------------------
# MISSING PROPERTY FREQUENCY MAP
# -----------------------------------------------
missing_frequency = {prop: 0 for prop in EXPECTED_PROPERTIES}

for _, missing in missing_per_node:
    for prop in missing:
        missing_frequency[prop] += 1

missing_sorted = sorted(missing_frequency.items(), key=lambda x: x[1], reverse=True)

print("\n=== MISSING PROPERTY FREQUENCY MAP ===")
for prop, count in missing_sorted:
    print(f"{prop}: {count}")





=== MISSING PROPERTY REPORT PER NODE ===
Label: Country
Total nodes: 249
Nodes missing at least one property: 207


=== MISSING PROPERTY FREQUENCY MAP ===
intermediate_region_code: 144
minimum_wage: 101
tax_revenue: 84
armed_forces: 81
gasoline_price: 77
cpi: 77
labor_participation: 76
unemployment: 76
cpi_change: 73
currency_code: 73
maternal_mortality: 71
total_tax_rate: 69
education_tertiary: 69
health_expenditure: 66
largest_city: 65
life_expectancy: 65
physicians: 64
co2_emissions: 64
forested_area: 64
fertility_rate: 64
education_primary: 64
agricultural_land: 64
capital: 63
infant_mortality: 63
birth_rate: 63
longitude: 62
urban_population: 62
latitude: 62
language: 62
gdp: 61
calling_code: 61
population: 61
land_area: 61
name2: 61
sub_region_code: 2
region_code: 2
region: 1
intermediate_region: 1
sub_region: 1
iso_3166_2: 0
alpha3: 0
name: 0
code: 0
country_code: 0


Missing properties from City

In [None]:
from neo4j import GraphDatabase

# -----------------------------------------------
# CONFIG
# -----------------------------------------------
NEO4J_URI = "neo4j://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "password"

LABEL = "City" 
EXPECTED_PROPERTIES = [
  "health_care_index",
  "countrycode",
  "accent_name",
  "location",
  "climate_index",
  "purchasing_power_index",
  "traffic_commute_time_index",
  "cost_of_living_index",
  "country",
  "pollution_index",
  "quality_of_life_index",
  "rank",
  "safety_index",
  "name",
  "longitude",
  "property_price_to_income_ratio",
  "latitude",
  "population"
]


# -----------------------------------------------
# NEO4J CONNECTION
# -----------------------------------------------
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))


def fetch_nodes_and_props(tx, label):
    query = f"""
    MATCH (n:{label})
    RETURN id(n) AS nodeId, properties(n) AS props
    """
    return [(record["nodeId"], record["props"]) for record in tx.run(query)]


# -----------------------------------------------
# MAIN SCRIPT
# -----------------------------------------------
with driver.session(database="aiportcorrect") as session:
    nodes = session.execute_read(fetch_nodes_and_props, LABEL)


driver.close()


# -----------------------------------------------
# PROPERTY VALIDATION PER NODE
# -----------------------------------------------
missing_per_node = []

for node_id, props in nodes:
    node_keys = set(props.keys())
    missing = set(EXPECTED_PROPERTIES) - node_keys

    if missing:
        missing_per_node.append((node_id, sorted(list(missing))))

# -----------------------------------------------
# REPORT
# -----------------------------------------------
print("=== MISSING PROPERTY REPORT PER NODE ===")
print(f"Label: {LABEL}")
print(f"Total nodes: {len(nodes)}")
print(f"Nodes missing at least one property: {len(missing_per_node)}\n")

# -----------------------------------------------
# MISSING PROPERTY FREQUENCY MAP
# -----------------------------------------------
missing_frequency = {prop: 0 for prop in EXPECTED_PROPERTIES}

for _, missing in missing_per_node:
    for prop in missing:
        missing_frequency[prop] += 1

missing_sorted = sorted(missing_frequency.items(), key=lambda x: x[1], reverse=True)

print("\n=== MISSING PROPERTY FREQUENCY MAP ===")
for prop, count in missing_sorted:
    print(f"{prop}: {count}")





=== MISSING PROPERTY REPORT PER NODE ===
Label: City
Total nodes: 44997
Nodes missing at least one property: 44864


=== MISSING PROPERTY FREQUENCY MAP ===
health_care_index: 44847
climate_index: 44847
purchasing_power_index: 44847
traffic_commute_time_index: 44847
cost_of_living_index: 44847
pollution_index: 44847
quality_of_life_index: 44847
rank: 44847
safety_index: 44847
property_price_to_income_ratio: 44847
accent_name: 17
location: 17
longitude: 17
latitude: 17
population: 17
countrycode: 0
country: 0
name: 0


In [None]:
from neo4j import GraphDatabase

# -----------------------------------------------
# CONFIG
# -----------------------------------------------
NEO4J_URI = "neo4j://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "password"

LABEL = "Airport"  # <-- your label
EXPECTED_PROPERTIES = [
  "tz_database_timezone",
  "commercial_flights", 
  "location", 
  "altitude", 
  "dst", 
  "type", 
  "runwaysLength", 
  "country", 
  "city", 
  "iata", 
  "timezone", 
  "source", 
  "runways",
  "icao", 
  "name", 
  "distCity", 
  "longitude", 
  "latitude", 
  "runwaysWidth", 
  "airport_id",
]


# -----------------------------------------------
# NEO4J CONNECTION
# -----------------------------------------------
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))


def fetch_nodes_and_props(tx, label):
    query = f"""
    MATCH (n:{label})
    RETURN id(n) AS nodeId, properties(n) AS props
    """
    return [(record["nodeId"], record["props"]) for record in tx.run(query)]


# -----------------------------------------------
# MAIN SCRIPT
# -----------------------------------------------
with driver.session(database="aiportcorrect") as session:
    nodes = session.execute_read(fetch_nodes_and_props, LABEL)


driver.close()


# -----------------------------------------------
# PROPERTY VALIDATION PER NODE
# -----------------------------------------------
missing_per_node = []

for node_id, props in nodes:
    node_keys = set(props.keys())
    missing = set(EXPECTED_PROPERTIES) - node_keys

    if missing:
        missing_per_node.append((node_id, sorted(list(missing))))

# -----------------------------------------------
# REPORT
# -----------------------------------------------
print("=== MISSING PROPERTY REPORT PER NODE ===")
print(f"Label: {LABEL}")
print(f"Total nodes: {len(nodes)}")
print(f"Nodes missing at least one property: {len(missing_per_node)}\n")

# -----------------------------------------------
# MISSING PROPERTY FREQUENCY MAP
# -----------------------------------------------
missing_frequency = {prop: 0 for prop in EXPECTED_PROPERTIES}

for _, missing in missing_per_node:
    for prop in missing:
        missing_frequency[prop] += 1

missing_sorted = sorted(missing_frequency.items(), key=lambda x: x[1], reverse=True)

test = {}

for node_id, props in nodes:
    propKeys = set(props.keys())
    for key in propKeys:
        if key in test.keys():
            test[key] += 1
        else:
            test[key] = 1

test_sorted = sorted(test.items(), key=lambda x: x[1], reverse=True)
print(test_sorted)

for prop, count in test_sorted:
    print(f"{prop}: {(count/7698)*100}")

print("\n=== MISSING PROPERTY FREQUENCY MAP ===")
for prop, count in missing_sorted:
    print(f"{prop}: {count}")





=== MISSING PROPERTY REPORT PER NODE ===
Label: Airport
Total nodes: 7698
Nodes missing at least one property: 7110

[('airport_id', 7698), ('dst', 7698), ('icao', 7698), ('type', 7698), ('name', 7698), ('longitude', 7698), ('altitude', 7698), ('location', 7698), ('country', 7698), ('timezone', 7698), ('tz_database_timezone', 7698), ('latitude', 7698), ('source', 7698), ('city', 7649), ('distCity', 6235), ('iata', 6072), ('runwaysLength', 5636), ('runways', 5636), ('runwaysWidth', 5636), ('commercial_flights', 744)]
airport_id: 100.0
dst: 100.0
icao: 100.0
type: 100.0
name: 100.0
longitude: 100.0
altitude: 100.0
location: 100.0
country: 100.0
timezone: 100.0
tz_database_timezone: 100.0
latitude: 100.0
source: 100.0
city: 99.36347103143673
distCity: 80.99506365289686
iata: 78.87763055339049
runwaysLength: 73.2138217718888
runways: 73.2138217718888
runwaysWidth: 73.2138217718888
commercial_flights: 9.66484801247077

=== MISSING PROPERTY FREQUENCY MAP ===
commercial_flights: 6954
runwaysL

Ranking airports

In [None]:
# Ranking airports

from neo4j import GraphDatabase
import csv

NEO4J_URI = "neo4j://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "password"

LABEL = "Airport" 

# -----------------------------------------------
# NEO4J CONNECTION
# -----------------------------------------------
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))


def rank_airports(tx, label):
    query = """
CALL gds.pageRank.stream('myGraphRoute')
YIELD nodeId, score
RETURN gds.util.asNode(nodeId).name AS name, gds.util.asNode(nodeId).iata as iata, score
ORDER BY score DESC
    """
    return [(record["name"], record["iata"],record['score']) for record in tx.run(query)]


# -----------------------------------------------
# MAIN SCRIPT
# -----------------------------------------------
with driver.session(database="aiportcorrect") as session:
    nodes = session.execute_read(rank_airports, LABEL)


print(list(nodes))
with open('ranks_pagerank.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerows(list(nodes))

driver.close()

[('Hartsfield Jackson Atlanta International Airport', 'ATL', 29.536791458876205), ("Chicago O'Hare International Airport", 'ORD', 18.63026087452378), ('Los Angeles International Airport', 'LAX', 17.833351223717916), ('Dallas Fort Worth International Airport', 'DFW', 17.149076667304442), ('Charles de Gaulle International Airport', 'CDG', 15.557675135292195), ('London Heathrow Airport', 'LHR', 15.47036176786689), ('Singapore Changi Airport', 'SIN', 15.177840541625912), ('Denver International Airport', 'DEN', 15.03413901359171), ('Beijing Capital International Airport', 'PEK', 14.949720618964234), ('Frankfurt am Main Airport', 'FRA', 14.188203938731187), ('John F Kennedy International Airport', 'JFK', 14.0369666035833), ('Domodedovo International Airport', 'DME', 13.772658125174248), ('Amsterdam Airport Schiphol', 'AMS', 13.063014853653883), ('Miami International Airport', 'MIA', 13.052886193116606), ('Sydney Kingsford Smith International Airport', 'SYD', 12.992126797225454), ('Atatürk In