### Refugee Movement Around the World Analysis

https://github.com/rfordatascience/tidytuesday/blob/master/data/2023/2023-08-22/readme.md

In [2]:
import neo4j
import pandas as pd
from IPython.display import display, Image
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
population = pd.read_csv('population.csv')

In [4]:
# Map country three letter codes to names (e.g., USA == United States of America) 
origin_country_dict = dict(zip(population['coo'], population['coo_name']))
asylum_country_dict = dict(zip(population['coa'], population['coa_name']))
country_name_dict = origin_country_dict | asylum_country_dict
print(f'{len(country_name_dict)} unique countries in dataset, with \
{len(origin_country_dict)} origin and {len(asylum_country_dict)} asylum.')

212 unique countries in dataset, with 210 origin and 189 asylum.


In [5]:
# Groupby to get overall refugee and asylum seeker counts
gb = population.groupby(['coo', 'coa'])['refugees', 'asylum_seekers'].sum()
gb.reset_index(inplace=True)
gb

  gb = population.groupby(['coo', 'coa'])['refugees', 'asylum_seekers'].sum()


Unnamed: 0,coo,coa,refugees,asylum_seekers
0,ABW,ABW,0,0
1,ABW,USA,0,7
2,AFG,AFG,0,0
3,AFG,ALB,19,0
4,AFG,ALG,0,5
...,...,...,...,...
7705,ZIM,TUR,6,113
7706,ZIM,UKR,0,35
7707,ZIM,USA,16123,7495
7708,ZIM,ZAM,53,5


In [6]:
#Qiong--Test Alternative Options
#Remove unqualified rows first before creating relationship

population2=population[population['refugees']>0]
population3=population2[population2['coo']!=population2['coa']]

# Map country three letter codes to names (e.g., USA == United States of America) 
origin_country_dict2 = dict(zip(population3['coo'], population3['coo_name']))
asylum_country_dict2 = dict(zip(population3['coa'], population3['coa_name']))
country_name_dict2 = origin_country_dict2 | asylum_country_dict2
print(f'{len(country_name_dict2)} unique countries in dataset, with \
{len(origin_country_dict2)} origin and {len(asylum_country_dict2)} asylum.')

#2 Countries only accepting refugees, not generating refugees

# Groupby to get overall refugee and asylum seeker counts
gb2 = population3.groupby(['coo', 'coa'])['refugees', 'asylum_seekers'].sum()
gb2.reset_index(inplace=True)
gb2

205 unique countries in dataset, with 203 origin and 177 asylum.


  gb2 = population3.groupby(['coo', 'coa'])['refugees', 'asylum_seekers'].sum()


Unnamed: 0,coo,coa,refugees,asylum_seekers
0,AFG,ALB,19,0
1,AFG,ARE,203,529
2,AFG,ARG,72,28
3,AFG,ARM,24,0
4,AFG,AUL,113114,20090
...,...,...,...,...
5645,ZIM,SWI,173,74
5646,ZIM,THA,35,0
5647,ZIM,TUR,6,59
5648,ZIM,USA,16123,7495


#### Connect, login, create driver, create session; with community edition, we can only use 1 database, the "neo4j" database

In [7]:
driver = neo4j.GraphDatabase.driver(uri="neo4j://neo4j:7687", auth=("neo4j","ucb_mids_w205"))

In [8]:
session = driver.session(database="neo4j")

#### Functions to Manage neo4j data

In [9]:
def my_neo4j_wipe_out_database():
    "wipe out database by deleting all nodes and relationships"
    
    query = "match (node)-[relationship]->() delete node, relationship"
    session.run(query)
    
    query = "match (node) delete node"
    session.run(query)

In [10]:
def my_neo4j_run_query_pandas(query, **kwargs):
    "run a query and return the results in a pandas dataframe"
    
    result = session.run(query, **kwargs)
    
    df = pd.DataFrame([r.values() for r in result], columns=result.keys())
    
    return df

In [11]:
def my_neo4j_nodes_relationships():
    "print all the nodes and relationships"
   
    print("-------------------------")
    print("  Nodes:")
    print("-------------------------")
    
    query = """
        match (n) 
        return n.name as node_name, labels(n) as labels
        order by n.name
    """
    
    df = my_neo4j_run_query_pandas(query)
    
    number_nodes = df.shape[0]
    
    display(df)
    
    print("-------------------------")
    print("  Relationships:")
    print("-------------------------")
    
    query = """
        match (n1)-[r]->(n2) 
        return n1.name as node_name_1, labels(n1) as node_1_labels, 
            type(r) as relationship_type, n2.name as node_name_2, labels(n2) as node_2_labels
        order by node_name_1, node_name_2
    """
    
    df = my_neo4j_run_query_pandas(query)
    
    number_relationships = df.shape[0]
    
    display(df)
    
    density = (2 * number_relationships) / (number_nodes * (number_nodes - 1))
    
    print("-------------------------")
    print("  Density:", f'{density:.1f}')
    print("-------------------------")

### Creating Graph 1 and 2

#### Graph 1 : Countries Accepting Refugees
Create a node for each country, with a one way relationship from the country the refugees are <b>emigrating from</b> to the country the refugees are <b>immigrating t</b>o, with the <b>weight of the relationship the number of refugees</b>.

In [20]:
my_neo4j_wipe_out_database()
accept_refugees = "CREATE"

In [21]:
for c_code, c_name in origin_country_dict.items():
    temp_string = "(" + c_code + ":Country {name: \"" + c_name + "\"}),"
    accept_refugees = accept_refugees + temp_string
    
# Skip where origin/destination country is the same
# Skip where refugees count is 0
coo_to_coa = []
for i in range(0, len(gb)):
    if gb.iloc[i]['coo'] != gb.iloc[i]['coa'] and\
    gb.iloc[i]['refugees'] > 0:
        coo_to_coa.append(
            "("
            + gb.iloc[i]['coo']
            + ")-[:IS_CONNECTED_TO {weight: " + str(gb.iloc[i]['refugees'])
            + "}]->("
            + gb.iloc[i]['coa']
            + ")"
        )
accept_refugees = accept_refugees + ', '.join(coo_to_coa)

In [22]:
def my_create_graph_1_accept_refugees():
    "create the connected graph"   
    my_neo4j_wipe_out_database()
    query = accept_refugees

    session.run(query)

In [23]:
# check
coo_to_coa[0]

'(AFG)-[:IS_CONNECTED_TO {weight: 19}]->(ALB)'

In [24]:
my_create_graph_1_accept_refugees()

In [25]:
my_neo4j_nodes_relationships()

-------------------------
  Nodes:
-------------------------


Unnamed: 0,node_name,labels
0,Afghanistan,[Country]
1,Albania,[Country]
2,Algeria,[Country]
3,Andorra,[Country]
4,Angola,[Country]
...,...,...
206,Western Sahara,[Country]
207,Yemen,[Country]
208,Zambia,[Country]
209,Zimbabwe,[Country]


-------------------------
  Relationships:
-------------------------


Unnamed: 0,node_name_1,node_1_labels,relationship_type,node_name_2,node_2_labels
0,Afghanistan,[Country],IS_CONNECTED_TO,Albania,[Country]
1,Afghanistan,[Country],IS_CONNECTED_TO,Argentina,[Country]
2,Afghanistan,[Country],IS_CONNECTED_TO,Armenia,[Country]
3,Afghanistan,[Country],IS_CONNECTED_TO,Australia,[Country]
4,Afghanistan,[Country],IS_CONNECTED_TO,Austria,[Country]
...,...,...,...,...,...
5645,Zimbabwe,[Country],IS_CONNECTED_TO,Thailand,[Country]
5646,Zimbabwe,[Country],IS_CONNECTED_TO,Türkiye,[Country]
5647,Zimbabwe,[Country],IS_CONNECTED_TO,United Kingdom of Great Britain and Northern I...,[Country]
5648,Zimbabwe,[Country],IS_CONNECTED_TO,United States of America,[Country]


-------------------------
  Density: 0.3
-------------------------


In [None]:
Image(filename='viz/Total Generating Refugee.png')

### Graph 2 : Countries of Origin of Refugees
Create a node for each country, with a one way relationship from the country the refugees are <b>immmigrating to</b> from the country the refugees are <b>emmigrating from</b>, with the <b>weight of the relationship the number of refugees</b>.

In [33]:
my_neo4j_wipe_out_database()
origin_refugees = "CREATE"

In [34]:
for c_code, c_name in country_name_dict.items():#Here you use country_name-dict but previous one you used origin_county? 
    temp_string = "(" + c_code + ":Country {name: \"" + c_name + "\"}),"
    origin_refugees = origin_refugees + temp_string

In [36]:
# Skip where origin/destination country is the same
# Skip where refugees count is 0
# NOTE: Same loop as above, but 'coo' and 'coa' are flipped
coa_to_coo = []
for i in range(0, len(gb)):
    if gb.iloc[i]['coo'] != gb.iloc[i]['coa'] and\
    gb.iloc[i]['refugees'] > 0:
        coa_to_coo.append(
            "("
            + gb.iloc[i]['coa']
            + ")-[:IS_CONNECTED_TO {weight: " + str(gb.iloc[i]['refugees'])
            + "}]->("
            + gb.iloc[i]['coo']
            + ")"
        )

origin_refugees = origin_refugees + ', '.join(coa_to_coo)

In [37]:
def my_create_graph_2_origin_refugees():
    "create the connected graph"    
    my_neo4j_wipe_out_database()
    query = origin_refugees

    session.run(query)

In [38]:
# check
coa_to_coo[0]

'(ALB)-[:IS_CONNECTED_TO {weight: 19}]->(AFG)'

In [39]:
my_create_graph_2_origin_refugees()

In [40]:
my_neo4j_nodes_relationships()

-------------------------
  Nodes:
-------------------------


Unnamed: 0,node_name,labels
0,Afghanistan,[Country]
1,Albania,[Country]
2,Algeria,[Country]
3,Andorra,[Country]
4,Angola,[Country]
...,...,...
207,Viet Nam,[Country]
208,Western Sahara,[Country]
209,Yemen,[Country]
210,Zambia,[Country]


-------------------------
  Relationships:
-------------------------


Unnamed: 0,node_name_1,node_1_labels,relationship_type,node_name_2,node_2_labels
0,Afghanistan,[Country],IS_CONNECTED_TO,Iran (Islamic Rep. of),[Country]
1,Afghanistan,[Country],IS_CONNECTED_TO,Iraq,[Country]
2,Afghanistan,[Country],IS_CONNECTED_TO,Pakistan,[Country]
3,Afghanistan,[Country],IS_CONNECTED_TO,Türkiye,[Country]
4,Albania,[Country],IS_CONNECTED_TO,Afghanistan,[Country]
...,...,...,...,...,...
5645,Zimbabwe,[Country],IS_CONNECTED_TO,Sierra Leone,[Country]
5646,Zimbabwe,[Country],IS_CONNECTED_TO,Somalia,[Country]
5647,Zimbabwe,[Country],IS_CONNECTED_TO,South Sudan,[Country]
5648,Zimbabwe,[Country],IS_CONNECTED_TO,Sudan,[Country]


-------------------------
  Density: 0.3
-------------------------


In [None]:
Image(filename='viz/countries accepting .png')

### Degree Centrality 
swap out which graph function is commented in the next cell to switch degree centrality outputs!
- using graph 1 will show us the countries with the most creation of refugees 
- using graph 2 will show us the countries with the most grant asylum

In [72]:
my_neo4j_wipe_out_database()

#my_create_graph_1_accept_refugees()
my_create_graph_2_origin_refugees()

In [73]:
query = "CALL gds.graph.drop('ds_graph', false)"
session.run(query)

query = "CALL gds.graph.project('ds_graph', 'Country', 'IS_CONNECTED_TO', {relationshipProperties: 'weight'})"
session.run(query)

<neo4j._sync.work.result.Result at 0x7f7f7d305400>

In [74]:
query = """

CALL gds.degree.stream('ds_graph')
YIELD nodeId, score
RETURN gds.util.asNode(nodeId).name AS name, score as degree
ORDER BY degree DESC, name

"""

my_neo4j_run_query_pandas(query)

Unnamed: 0,name,degree
0,Canada,180.0
1,United States of America,180.0
2,Germany,165.0
3,United Kingdom of Great Britain and Northern I...,146.0
4,France,140.0
...,...,...
207,Timor-Leste,0.0
208,Tonga,0.0
209,Tuvalu,0.0
210,Unknown,0.0


### Harmonic Centrality
swap out which graph function is commented in the next cell to switch harmonic centrality outputs!
- using graph 1 will tell us the intermediate countries with the highest volume of creating refugees
- using graph 2 will tell us the intermediate countries with the highest volume of granting asylum

In [78]:
my_neo4j_wipe_out_database()

my_create_graph_1_accept_refugees()
#my_create_graph_2_origin_refugees()

In [79]:
query = "CALL gds.graph.drop('ds_graph', false)"
session.run(query)

query = "CALL gds.graph.project('ds_graph', 'Country', 'IS_CONNECTED_TO', {relationshipProperties: 'weight'})"
session.run(query)

<neo4j._sync.work.result.Result at 0x7f7f7d24ba00>

In [80]:
query = """

CALL gds.alpha.closeness.harmonic.stream('ds_graph', {})
YIELD nodeId, centrality
RETURN gds.util.asNode(nodeId).name AS name, centrality as closeness
ORDER BY centrality DESC

"""

my_neo4j_run_query_pandas(query)

Unnamed: 0,name,closeness
0,United States of America,0.913876
1,Canada,0.913876
2,Germany,0.877990
3,United Kingdom of Great Britain and Northern I...,0.830941
4,France,0.815789
...,...,...
205,Brunei Darussalam,0.000000
206,French Guiana,0.000000
207,Holy See,0.000000
208,Anguilla,0.000000


### Louvain Modularity
with communities that physically span the sea showing sea port routes that refugees might be taking

In [85]:
my_neo4j_wipe_out_database()

my_create_graph_1_accept_refugees()
#my_create_graph_2_origin_refugees()

In [86]:
query = "CALL gds.graph.drop('ds_graph', false)"
session.run(query)

query = """

CALL gds.graph.project('ds_graph', 'Country', 'IS_CONNECTED_TO', 
                      {relationshipProperties: 'weight'})
"""

session.run(query)

<neo4j._sync.work.result.Result at 0x7f7f7caece80>

In [87]:
query = """

CALL gds.louvain.stream('ds_graph', {includeIntermediateCommunities: true})
YIELD nodeId, communityId, intermediateCommunityIds
RETURN gds.util.asNode(nodeId).name AS name, communityId as community, intermediateCommunityIds as intermediate_community
ORDER BY community, name ASC

"""

my_neo4j_run_query_pandas(query)

Unnamed: 0,name,community,intermediate_community
0,Bermuda,186,"[186, 186, 186]"
1,Tuvalu,194,"[194, 194, 194]"
2,Brunei Darussalam,200,"[200, 200, 200]"
3,Afghanistan,201,"[128, 128, 201]"
4,Albania,201,"[128, 128, 201]"
...,...,...,...
205,Zambia,201,"[128, 128, 201]"
206,Zimbabwe,201,"[128, 128, 201]"
207,Holy See,205,"[205, 205, 205]"
208,Martinique,208,"[208, 208, 208]"
