## Refugee Movement Around the World Analysis

https://github.com/rfordatascience/tidytuesday/blob/master/data/2023/2023-08-22/readme.md

In [7]:
import neo4j
import pandas as pd
from IPython.display import display
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Import data

In [8]:
population = pd.read_csv('population.csv')

In [9]:
population[population['coa'] == 'USA'].head(2)

Unnamed: 0,year,coo_name,coo,coo_iso,coa_name,coa,coa_iso,refugees,asylum_seekers,returned_refugees,idps,returned_idps,stateless,ooc,oip,hst
3975,2010,Afghanistan,AFG,AFG,United States of America,USA,USA,1364,46,0,0,0,0,0,,
3976,2010,Albania,ALB,ALB,United States of America,USA,USA,6004,18,0,0,0,0,0,,


In [10]:
# Map country three letter codes to names (e.g., USA == United States of America) 
origin_country_dict = dict(zip(population['coo'], population['coo_name']))
asylum_country_dict = dict(zip(population['coa'], population['coa_name']))
country_name_dict = origin_country_dict | asylum_country_dict
print(f'{len(country_name_dict)} unique countries in dataset, with \
{len(origin_country_dict)} origin and {len(asylum_country_dict)} asylum.')

212 unique countries in dataset, with 210 origin and 189 asylum.


In [134]:
# Groupby to get overall refugee and asylum seeker counts
gb = population.groupby(['coo', 'coa'])['refugees', 'asylum_seekers'].sum()
gb.reset_index(inplace=True)
gb

  gb = population.groupby(['coo', 'coa'])['refugees', 'asylum_seekers'].sum()


Unnamed: 0,coo,coa,refugees,asylum_seekers
0,ABW,ABW,0,0
1,ABW,USA,0,7
2,AFG,AFG,0,0
3,AFG,ALB,19,0
4,AFG,ALG,0,5
...,...,...,...,...
7705,ZIM,TUR,6,113
7706,ZIM,UKR,0,35
7707,ZIM,USA,16123,7495
7708,ZIM,ZAM,53,5


## Connect, login, create driver, create session; with community edition, we can only use 1 database, the "neo4j" database

In [11]:
driver = neo4j.GraphDatabase.driver(uri="neo4j://neo4j:7687", auth=("neo4j","ucb_mids_w205"))

In [12]:
session = driver.session(database="neo4j")

## Functions to Manage neo4j data

In [13]:
def my_neo4j_wipe_out_database():
    "wipe out database by deleting all nodes and relationships"
    
    query = "match (node)-[relationship]->() delete node, relationship"
    session.run(query)
    
    query = "match (node) delete node"
    session.run(query)

In [14]:
def my_neo4j_run_query_pandas(query, **kwargs):
    "run a query and return the results in a pandas dataframe"
    
    result = session.run(query, **kwargs)
    
    df = pd.DataFrame([r.values() for r in result], columns=result.keys())
    
    return df

In [15]:
def my_neo4j_nodes_relationships():
    "print all the nodes and relationships"
   
    print("-------------------------")
    print("  Nodes:")
    print("-------------------------")
    
    query = """
        match (n) 
        return n.name as node_name, labels(n) as labels
        order by n.name
    """
    
    df = my_neo4j_run_query_pandas(query)
    
    number_nodes = df.shape[0]
    
    display(df)
    
    print("-------------------------")
    print("  Relationships:")
    print("-------------------------")
    
    query = """
        match (n1)-[r]->(n2) 
        return n1.name as node_name_1, labels(n1) as node_1_labels, 
            type(r) as relationship_type, n2.name as node_name_2, labels(n2) as node_2_labels
        order by node_name_1, node_name_2
    """
    
    df = my_neo4j_run_query_pandas(query)
    
    number_relationships = df.shape[0]
    
    display(df)
    
    density = (2 * number_relationships) / (number_nodes * (number_nodes - 1))
    
    print("-------------------------")
    print("  Density:", f'{density:.1f}')
    print("-------------------------")
    

## Nodes and Relationships
+ The cells need to be run in succession, creating nodes per country, before creating relationships
+ BUT, the nodes and relationships also need to be created in a _single query_, otherwise neo4j can't find
the country based on its name
+NOTE: neo4j requires <font color='red'>names begin with a letter</font> (e.g., _2011_ throws error but _Y2011_ accepted)

In [130]:
my_neo4j_wipe_out_database()

In [131]:
### Loops to create:
    # 1) node per country
    # 2) relationships between origin and destination countries
# NOTE: neo4j requires names begin with a letter
query = 'CREATE'

In [133]:
# Loop to create node per country
# Template --> (usa:Country {name: 'United States of America'}),
for c_code, c_name in origin_country_dict.items():
    temp_string = "(" + c_code + ":Country {name: \"" + c_name + "\"}),"
    query = query + temp_string

In [135]:
# Loop to create relationships between countries
# Skip where origin/destination country is the same
relationships = []
for i in range(0, len(gb)):
    if gb.iloc[i]['coo'] != gb.iloc[i]['coa']:
        relationships.append(
            "("
            + gb.iloc[i]['coo']
            + ")-[:IS_CONNECTED_TO "
            + "{refugees: " + str(gb.iloc[i]['refugees']) + ', '
            + "asylum_seekers: " + str(gb.iloc[i]['asylum_seekers'])
            + "}]->("
            + gb.iloc[i]['coa']
            + ")"
        )

query = query + ', '.join(relationships)

In [136]:
# Preview first chars in query
query[:200]

'CREATE(Y2010:Year {name: 2010}),(Y2011:Year {name: 2011}),(Y2012:Year {name: 2012}),(Y2013:Year {name: 2013}),(Y2014:Year {name: 2014}),(Y2015:Year {name: 2015}),(Y2016:Year {name: 2016}),(Y2017:Year '

In [137]:
# Run query to create node and relationships
session.run(query)

<neo4j._sync.work.result.Result at 0x7fbbabb57340>

In [138]:
# Confirm that nodes and relationships created successfully
my_neo4j_nodes_relationships()

-------------------------
  Nodes:
-------------------------


Unnamed: 0,node_name,labels
0,Afghanistan,[Country]
1,Albania,[Country]
2,Algeria,[Country]
3,Andorra,[Country]
4,Angola,[Country]
...,...,...
220,2020,[Year]
221,2021,[Year]
222,2022,[Year]
223,,[]


-------------------------
  Relationships:
-------------------------


Unnamed: 0,node_name_1,node_1_labels,relationship_type,node_name_2,node_2_labels
0,Afghanistan,[Country],IS_CONNECTED_TO,Albania,[Country]
1,Afghanistan,[Country],IS_CONNECTED_TO,Algeria,[Country]
2,Afghanistan,[Country],IS_CONNECTED_TO,Argentina,[Country]
3,Afghanistan,[Country],IS_CONNECTED_TO,Armenia,[Country]
4,Afghanistan,[Country],IS_CONNECTED_TO,Australia,[Country]
...,...,...,...,...,...
7624,Zimbabwe,[Country],IS_CONNECTED_TO,Türkiye,[Country]
7625,Zimbabwe,[Country],IS_CONNECTED_TO,Ukraine,[Country]
7626,Zimbabwe,[Country],IS_CONNECTED_TO,United Kingdom of Great Britain and Northern I...,[Country]
7627,Zimbabwe,[Country],IS_CONNECTED_TO,United States of America,[Country]


-------------------------
  Density: 0.3
-------------------------
