In [5]:
from neo4j import GraphDatabase

# Neo4j connection configuration
URI = "bolt://localhost:7687"
AUTH = ("neo4j", "bundestag_password")

def get_neo4j_schema():
    """Get comprehensive schema information from Neo4j database"""
    
    driver = GraphDatabase.driver(URI, auth=AUTH)
    
    try:
        with driver.session() as session:
            print("=== NEO4J DATABASE SCHEMA ===\n")
            
            # Get all node labels
            print("1. NODE LABELS:")
            result = session.run("CALL db.labels()")
            labels = [record["label"] for record in result]
            
            for label in sorted(labels):
                print(f"   - {label}")
            print()
            
            # Get all relationship types
            print("2. RELATIONSHIP TYPES:")
            result = session.run("CALL db.relationshipTypes()")
            rel_types = [record["relationshipType"] for record in result]
            for rel_type in sorted(rel_types):
                print(f"   - {rel_type}")
            print()
            
            # Get property keys
            print("3. PROPERTY KEYS:")
            result = session.run("CALL db.propertyKeys()")
            prop_keys = [record["propertyKey"] for record in result]
            for prop_key in sorted(prop_keys):
                print(f"   - {prop_key}")
            print()
            
            # Get constraints
            print("4. CONSTRAINTS:")
            result = session.run("SHOW CONSTRAINTS")
            constraints = list(result)
            if constraints:
                for constraint in constraints:
                    print(f"   - {constraint['name']}: {constraint['description']}")
            else:
                print("   - No constraints found")
            print()
            
            # Get indexes
            print("5. INDEXES:")
            result = session.run("SHOW INDEXES")
            indexes = list(result)
            if indexes:
                for index in indexes:
                    print(f"   - {index['name']}: {index['labelsOrTypes']} on {index['properties']}")
            else:
                print("   - No indexes found")
            print()
            
            # Get detailed schema patterns using pure Cypher
            print("6. SCHEMA PATTERNS:")
            
            # Get properties for each node label
            print("   Node Properties by Label:")
            for label in labels:
                result = session.run(f"MATCH (n:`{label}`) RETURN keys(n) as props LIMIT 1")
                record = result.single()
                if record and record['props']:
                    print(f"   - {label}: {record['props']}")
                else:
                    print(f"   - {label}: No properties found")
            
            print("\n   Relationship Patterns:")
            # Get relationship patterns with their properties
            result = session.run("""
                MATCH (a)-[r]->(b)
                RETURN DISTINCT 
                    labels(a)[0] as source_label,
                    type(r) as relationship_type,
                    labels(b)[0] as target_label,
                    keys(r) as rel_properties
                ORDER BY source_label, relationship_type, target_label
            """)
            
            for record in result:
                source = record['source_label'] or 'UnlabeledNode'
                rel_type = record['relationship_type']
                target = record['target_label'] or 'UnlabeledNode'
                rel_props = record['rel_properties'] or []
                
                pattern = f"   - ({source})-[:{rel_type}]->({target})"
                if rel_props:
                    pattern += f" [Properties: {rel_props}]"
                print(pattern)
            
            print("\n7. DATABASE STATISTICS:")
            result = session.run("""
                MATCH (n)
                RETURN labels(n) as label, count(n) as count
                ORDER BY count DESC
            """)
            
            print("   Node counts by label:")
            for record in result:
                label = record["label"][0] if record["label"] else "No Label"
                count = record["count"]
                print(f"   - {label}: {count}")
            
            # Relationship counts
            result = session.run("""
                MATCH ()-[r]->()
                RETURN type(r) as rel_type, count(r) as count
                ORDER BY count DESC
            """)
            
            print("\n   Relationship counts by type:")
            for record in result:
                rel_type = record["rel_type"]
                count = record["count"]
                print(f"   - {rel_type}: {count}")
                
    except Exception as e:
        print(f"Error connecting to Neo4j: {e}")
        print("Make sure Neo4j is running and credentials are correct.")
    
    finally:
        driver.close()

# Execute the schema analysis
get_neo4j_schema()


=== NEO4J DATABASE SCHEMA ===

1. NODE LABELS:
   - Classification
   - Constituency
   - Content
   - Domain
   - Mandate
   - Page
   - Party
   - Period
   - Politician
   - State

2. RELATIONSHIP TYPES:
   - AFFILIATED_WITH
   - BELONGS_TO_DOMAIN
   - HAS_CLASSIFICATION
   - HAS_CONTENT
   - HAS_DETAIL_PAGE
   - HAS_MANDATE
   - HAS_SOURCE_PAGE
   - IN_PERIOD
   - LINKS_TO_DETAIL
   - REPRESENTS_CONSTITUENCY
   - REPRESENTS_STATE
   - SERVED_DURING

3. PROPERTY KEYS:
   - birth_year
   - comment
   - completion_tokens
   - confidence
   - constituency
   - dauer
   - death_year
   - description
   - detail_page
   - dqr_level
   - ende_jahr
   - estimated_costs
   - federate_state
   - firstname
   - full_name
   - geschlecht
   - geschlecht_confidence
   - geschlecht_reasoning
   - html
   - id
   - lastname
   - model
   - name
   - number
   - political_party
   - prompt_tokens
   - remarks
   - section_content
   - section_header
   - start_jahr
   - timestamp
   - title
   - u

In [11]:
driver = GraphDatabase.driver(URI, auth=AUTH)

with driver.session() as session:
    result = session.run("""MATCH (n) 
                        WHERE n.start_jahr IS NOT NULL
                        RETURN DISTINCT n.start_jahr AS start_jahr LIMIT 25""")
    result_data = [r['start_jahr'] for r in result]

result_data

[1949,
 1953,
 1957,
 1961,
 1965,
 1969,
 1972,
 1976,
 1980,
 1983,
 1987,
 1990,
 1994,
 1998,
 2002,
 2005,
 2009,
 2013,
 2017,
 2021]