In [15]:
#!pip install neo4j

Defaulting to user installation because normal site-packages is not writeable



[notice] A new release of pip available: 22.3.1 -> 23.1
[notice] To update, run: C:\ProgramData\Anaconda3\python.exe -m pip install --upgrade pip


In [1]:
from neo4j import GraphDatabase
from neo4j.exceptions import ServiceUnavailable
import pandas as pd
import re

In [28]:
# Read data
# Data source: https://data.aseanstats.org/visitors
arrival_df = pd.read_csv("ASEANStatsData.csv", skiprows=1)
arrival_df=arrival_df.fillna(method="ffill")
# arrival_df = pd.read_excel("arrivals_asean.xlsx")
print(len(arrival_df))
arrival_df.head()

770


Unnamed: 0,Destination Country,Origin Country,2019
0,Brunei Darussalam,Australia [AU],10 188
1,Brunei Darussalam,Bangladesh [BD],3 281
2,Brunei Darussalam,Cambodia [KH],463
3,Brunei Darussalam,Canada [CA],2 322
4,Brunei Darussalam,China [CN],74 511


In [29]:
# Read Mapping data

code_map = pd.read_csv("country_name_code_map.csv")
code_map = code_map.rename(columns={'Country Name':'Destination Country', 'Country Code': 'Destination Country Code'})

In [30]:
# ETL
arrival_df = arrival_df.merge(code_map, on='Destination Country', how='left')
arrival_df[['Origin Country Name','Origin Country Code']]= arrival_df["Origin Country"].str.split('[',expand=True)
arrival_df["Origin Country Name"]= arrival_df["Origin Country Name"].str.strip()
arrival_df["Origin Country Code"] = arrival_df["Origin Country Code"].str.replace(']','')
arrival_df['visitors'] = arrival_df['2019'].str.replace(' ', '').astype(int)
arrival_df.head()

Unnamed: 0,Destination Country,Origin Country,2019,Destination Country Code,Origin Country Name,Origin Country Code,visitors
0,Brunei Darussalam,Australia [AU],10 188,BN,Australia,AU,10188
1,Brunei Darussalam,Bangladesh [BD],3 281,BN,Bangladesh,BD,3281
2,Brunei Darussalam,Cambodia [KH],463,BN,Cambodia,KH,463
3,Brunei Darussalam,Canada [CA],2 322,BN,Canada,CA,2322
4,Brunei Darussalam,China [CN],74 511,BN,China,CN,74511


In [31]:
# Remove rows with same destination and origin country
arrival_df = arrival_df[arrival_df["Destination Country Code"] != arrival_df["Origin Country Code"]]
print(len(arrival_df))

# Remove rows with "Others Unspecified Countries"
arrival_df = arrival_df[arrival_df["Origin Country Name"] != "Others Unspecified Countries"]
print(len(arrival_df))

769
759


In [32]:
# Get unique list of Countries

dest_country = arrival_df[["Destination Country", "Destination Country Code"]]
dest_country = dest_country.rename(columns={'Destination Country':'Country Name', 'Destination Country Code':'Country Code'})

ori_country = arrival_df[["Origin Country Name","Origin Country Code"]]
ori_country = ori_country.rename(columns={'Origin Country Name':'Country Name', 'Origin Country Code':'Country Code'})

entities= pd.concat([dest_country,ori_country])
print(len(entities))

unique_entities = entities.drop_duplicates(keep='first', inplace=False)
print(len(unique_entities))

1518
197


In [33]:
# Get relevant columns for relationship in graph

arrival_df = arrival_df[["Destination Country Code", "Origin Country Code", "visitors"]]
arrival_df.head()

Unnamed: 0,Destination Country Code,Origin Country Code,visitors
0,BN,AU,10188
1,BN,BD,3281
2,BN,KH,463
3,BN,CA,2322
4,BN,CN,74511


https://neo4j.com/docs/aura/aurads/connecting/python/

In [11]:
# Replace with the actual URI, username and password
# AURA_CONNECTION_URI = "neo4j+s://85b099f8.databases.neo4j.io:7687" (To connect to system owner's Neo4J Aura instance)
# AURA_CONNECTION_URI = "neo4j+s://xxxxxxxx.databases.neo4j.io" (If connect to your Neo4J Aura instance, to 'xxxxxxxx')
# AURA_USERNAME = "neo4j" (If connect to your Neo4J Aura instance, input username taged to the Neo4J instance)
# AURA_PASSWORD = "password" (If connect to your Neo4J Aura instance, input password taged to the Neo4J instance)

# Driver instantiation
driver = GraphDatabase.driver(
    AURA_CONNECTION_URI,
    auth=(AURA_USERNAME, AURA_PASSWORD)
)

In [34]:
# delete all nodes first
# DO NOT delete if you already have access to the instance with pre-populated graph
# query = (
#        "MATCH (all_nodes)"
#        "OPTIONAL MATCH (all_nodes)-[all_rels]->()"
#        "DELETE all_nodes, all_rels"
#   )
# with driver.session() as session:
#    result = session.run(query)

In [35]:
# Create nodes for each unique country    
for i,r in unique_entities.iterrows():
    query = (
                "MERGE (node: country {code: $code, name: $name})"
                "RETURN node"
            )
    with driver.session() as session:
        result = session.run(query, code=r['Country Code'], name=r['Country Name'])
    print("create new node with code as", r['Country Code'])

create new node with code as BN
create new node with code as KH
create new node with code as ID
create new node with code as LA
create new node with code as MY
create new node with code as MM
create new node with code as PH
create new node with code as SG
create new node with code as TH
create new node with code as VN
create new node with code as AU
create new node with code as BD
create new node with code as CA
create new node with code as CN
create new node with code as DK
create new node with code as FR
create new node with code as DE
create new node with code as IN
create new node with code as IE
create new node with code as IT
create new node with code as JP
create new node with code as KR
create new node with code as NP
create new node with code as NL
create new node with code as NZ
create new node with code as NO
create new node with code as OM
create new node with code as PK
create new node with code as LK
create new node with code as SE
create new node with code as GB
create n

In [37]:
# Create relationships

for (_,dest, orig, number) in arrival_df.itertuples():
    print(dest, orig, number)
    query = (
        "MATCH (n1 {code: $code1})"
        "MATCH (n2 {code: $code2})"
        "MERGE (n1) - [r: travel_to {visitors: $arrival}] -> (n2)"
        "RETURN n1, n2, r"
    )
    with driver.session() as session:
        result = session.run(query, code1=orig, code2=dest, arrival=number)
    print("create new relationship {0} - {1} visitors travel_to -> {2}".format(orig, number, dest))

BN AU 10188
create new relationship AU - 10188 visitors travel_to -> BN
BN BD 3281
create new relationship BD - 3281 visitors travel_to -> BN
BN KH 463
create new relationship KH - 463 visitors travel_to -> BN
BN CA 2322
create new relationship CA - 2322 visitors travel_to -> BN
BN CN 74511
create new relationship CN - 74511 visitors travel_to -> BN
BN DK 312
create new relationship DK - 312 visitors travel_to -> BN
BN FR 1381
create new relationship FR - 1381 visitors travel_to -> BN
BN DE 1764
create new relationship DE - 1764 visitors travel_to -> BN
BN IN 8925
create new relationship IN - 8925 visitors travel_to -> BN
BN ID 33626
create new relationship ID - 33626 visitors travel_to -> BN
BN IE 109
create new relationship IE - 109 visitors travel_to -> BN
BN IT 612
create new relationship IT - 612 visitors travel_to -> BN
BN JP 10680
create new relationship JP - 10680 visitors travel_to -> BN
BN KR 15767
create new relationship KR - 15767 visitors travel_to -> BN
BN LA 221
create n

create new relationship TN - 844 visitors travel_to -> KH
KH TR 6666
create new relationship TR - 6666 visitors travel_to -> KH
KH UA 7476
create new relationship UA - 7476 visitors travel_to -> KH
KH AE 754
create new relationship AE - 754 visitors travel_to -> KH
KH GB 163177
create new relationship GB - 163177 visitors travel_to -> KH
KH US 248863
create new relationship US - 248863 visitors travel_to -> KH
KH UY 2170
create new relationship UY - 2170 visitors travel_to -> KH
KH UZ 701
create new relationship UZ - 701 visitors travel_to -> KH
KH VE 529
create new relationship VE - 529 visitors travel_to -> KH
KH VN 908803
create new relationship VN - 908803 visitors travel_to -> KH
KH YE 576
create new relationship YE - 576 visitors travel_to -> KH
ID AF 703
create new relationship AF - 703 visitors travel_to -> ID
ID AL 1041
create new relationship AL - 1041 visitors travel_to -> ID
ID DZ 5000
create new relationship DZ - 5000 visitors travel_to -> ID
ID AS 89
create new relationsh

create new relationship MD - 1135 visitors travel_to -> ID
ID MC 92
create new relationship MC - 92 visitors travel_to -> ID
ID MN 4260
create new relationship MN - 4260 visitors travel_to -> ID
ID MA 11371
create new relationship MA - 11371 visitors travel_to -> ID
ID MZ 507
create new relationship MZ - 507 visitors travel_to -> ID
ID MM 46381
create new relationship MM - 46381 visitors travel_to -> ID
ID NA 598
create new relationship NA - 598 visitors travel_to -> ID
ID NR 23
create new relationship NR - 23 visitors travel_to -> ID
ID NP 18977
create new relationship NP - 18977 visitors travel_to -> ID
ID NL 215287
create new relationship NL - 215287 visitors travel_to -> ID
ID NC 2
create new relationship NC - 2 visitors travel_to -> ID
ID NZ 149010
create new relationship NZ - 149010 visitors travel_to -> ID
ID NI 165
create new relationship NI - 165 visitors travel_to -> ID
ID NE 55
create new relationship NE - 55 visitors travel_to -> ID
ID NG 3262
create new relationship NG - 3

create new relationship EG - 29831 visitors travel_to -> MY
MY FR 141661
create new relationship FR - 141661 visitors travel_to -> MY
MY DE 130221
create new relationship DE - 130221 visitors travel_to -> MY
MY IN 735309
create new relationship IN - 735309 visitors travel_to -> MY
MY ID 3623277
create new relationship ID - 3623277 visitors travel_to -> MY
MY IQ 21421
create new relationship IQ - 21421 visitors travel_to -> MY
MY IE 19696
create new relationship IE - 19696 visitors travel_to -> MY
MY IR 46559
create new relationship IR - 46559 visitors travel_to -> MY
MY IT 54710
create new relationship IT - 54710 visitors travel_to -> MY
MY JP 424694
create new relationship JP - 424694 visitors travel_to -> MY
MY KZ 18138
create new relationship KZ - 18138 visitors travel_to -> MY
MY KR 673065
create new relationship KR - 673065 visitors travel_to -> MY
MY LA 26955
create new relationship LA - 26955 visitors travel_to -> MY
MY MM 46257
create new relationship MM - 46257 visitors travel

create new relationship LR - 71 visitors travel_to -> MM
MM LY 7
create new relationship LY - 7 visitors travel_to -> MM
MM LI 6
create new relationship LI - 6 visitors travel_to -> MM
MM LT 500
create new relationship LT - 500 visitors travel_to -> MM
MM LU 257
create new relationship LU - 257 visitors travel_to -> MM
MM MO 3352
create new relationship MO - 3352 visitors travel_to -> MM
MM MG 27
create new relationship MG - 27 visitors travel_to -> MM
MM MW 61
create new relationship MW - 61 visitors travel_to -> MM
MM MY 44203
create new relationship MY - 44203 visitors travel_to -> MM
MM MV 36
create new relationship MV - 36 visitors travel_to -> MM
MM ML 10
create new relationship ML - 10 visitors travel_to -> MM
MM MT 118
create new relationship MT - 118 visitors travel_to -> MM
MM MH 12
create new relationship MH - 12 visitors travel_to -> MM
MM MR 1
create new relationship MR - 1 visitors travel_to -> MM
MM MU 125
create new relationship MU - 125 visitors travel_to -> MM
MM MX 2

create new relationship NZ - 37872 visitors travel_to -> PH
PH NG 3439
create new relationship NG - 3439 visitors travel_to -> PH
PH NO 23464
create new relationship NO - 23464 visitors travel_to -> PH
PH OM 2526
create new relationship OM - 2526 visitors travel_to -> PH
PH PK 5793
create new relationship PK - 5793 visitors travel_to -> PH
PH PG 8828
create new relationship PG - 8828 visitors travel_to -> PH
PH PE 1660
create new relationship PE - 1660 visitors travel_to -> PH
PH PL 15816
create new relationship PL - 15816 visitors travel_to -> PH
PH PT 8113
create new relationship PT - 8113 visitors travel_to -> PH
PH QA 2491
create new relationship QA - 2491 visitors travel_to -> PH
PH RU 36111
create new relationship RU - 36111 visitors travel_to -> PH
PH SA 43748
create new relationship SA - 43748 visitors travel_to -> PH
PH SG 158595
create new relationship SG - 158595 visitors travel_to -> PH
PH ZA 8553
create new relationship ZA - 8553 visitors travel_to -> PH
PH ES 49748
create

create new relationship AU - 383511 visitors travel_to -> VN
VN BE 34187
create new relationship BE - 34187 visitors travel_to -> VN
VN KH 227910
create new relationship KH - 227910 visitors travel_to -> VN
VN CA 159121
create new relationship CA - 159121 visitors travel_to -> VN
VN CN 5806425
create new relationship CN - 5806425 visitors travel_to -> VN
VN DK 42043
create new relationship DK - 42043 visitors travel_to -> VN
VN FI 21480
create new relationship FI - 21480 visitors travel_to -> VN
VN FR 287655
create new relationship FR - 287655 visitors travel_to -> VN
VN DE 226792
create new relationship DE - 226792 visitors travel_to -> VN
VN HK 51618
create new relationship HK - 51618 visitors travel_to -> VN
VN IN 168998
create new relationship IN - 168998 visitors travel_to -> VN
VN ID 106688
create new relationship ID - 106688 visitors travel_to -> VN
VN IT 70798
create new relationship IT - 70798 visitors travel_to -> VN
VN JP 951962
create new relationship JP - 951962 visitors t

In [38]:
#calculate the total visitor volume of this dataset
query = (
        "MATCH (c1)-[r:travel_to]->(c2 {code :'SG'})"
        "RETURN max(r.visitors), min(r.visitors)"
        )
with driver.session() as session:
    results = session.run(query)
    for result in results:
        print(query)
                    
        maxvisitors=result['max(r.visitors)']
        minvisitors=result['min(r.visitors)']

MATCH (c1)-[r:travel_to]->(c2 {code :'SG'})RETURN max(r.visitors), min(r.visitors)


In [39]:
print(maxvisitors) #To ensure that you get 3627030
print(minvisitors) #To ensure that you get 5182

3627030
5182
