In [None]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# New imports for using Neo4j
import networkx as nx
from py2neo import Graph
from helpers.vis import draw
%set_env NEO4J_URL http://neo4j:neo4j2@localhost:7474
%load_ext cypher

In [None]:
graph = Graph('http://neo4j:neo4j2@localhost:7474')

In [None]:
# function to search columns of a pandas dataframe
def search(df, col, words):
    cond = [df[col].str.contains('(?<![\w\d])'+w+'(?![\w\d])', case=False, na=False) for w in words]
    return df[np.logical_and.reduce(cond)]

# split a string in words with no ponctuation
import re
def split(str):
    return re.split('\W+', str)

In [None]:
# load datasets
from helpers.swiss_codes import filter_swiss
# 1. officers
officers = pd.read_csv('../data/panama/Officers.csv', header=0, low_memory=False)
swiss_officers = filter_swiss(officers, process_address=False)
del officers
# 2. entities
entities = pd.read_csv('../data/panama/Entities.csv', header=0, low_memory=False)
swiss_entities = filter_swiss(entities)
del entities
# 3. intermediaries
inter = pd.read_csv('../data/panama/Intermediaries.csv', header=0, low_memory=False)
swiss_inter = filter_swiss(inter)
del inter

## All interests

In [None]:
all_interests = pd.read_json('data/all_interests.json')

Looking for names in Officers:

In [None]:
rs = []
for _,f,l in all_interests[['first_name','last_name']].drop_duplicates().itertuples():
    ws = f + ' ' + l
    r = search(swiss_officers, 'name', split(ws))
    if r.size > 0 or l == 'Zacharias':
        rs.append(ws)

In [None]:
rs

And the companies from Entities and Intermedietaries:

In [None]:
ints = []
for interest in all_interests['interest_name'].drop_duplicates():
    ws = split(interest)
    r = search(swiss_inter, 'name', ws)
    if r.size > 0:
        ints.append([interest, r['name']])

In [None]:
ints

In [None]:
try:
    inters = pd.read_csv('data/all_interests_entities_name.csv', index_col=0)
except:
    ents = []
    for interest in all_interests['interest_name'].drop_duplicates():
        ws = split(interest)
        r = search(swiss_entities, 'name', ws)
        if r.size > 0:
            for idx,name in r['name'].iteritems():
                ents.append([interest.strip(),idx,name])
    inters = pd.DataFrame(ents, columns=['interest_name','idx','entity_name'])
    inters.to_csv('data/all_interests_entities_name.csv')

In [None]:
inters

These matches have to be checked more in details as they seem to mean something else...

## Parlement data

In [None]:
parlement = pd.read_json('data/parliament_members_interests.json')

In [None]:
ps = []
for _,f,l in parlement[['FirstName','LastName']].drop_duplicates().itertuples():
    ws = f + ' ' + l
    r = search(swiss_officers, 'name', split(ws))
    if r.size > 0:
        ps.append(ws)

In [None]:
ps

In [None]:
parl_ints = set()
for ints in parlement['Interests']:
    for i in ints:
        name = i['InterestName']
        parl_ints.add(name)

In [None]:
pis = []
for p in parl_ints:
    r = search(swiss_inter, 'name', split(p))
    if r.size > 0:
        pis.append(p)

In [None]:
pis

Only one match and there're a lot of possibilities with this name...

### Querying the Neo4j with Poggia

In [None]:
%%cypher 
MATCH (o:Officer) WHERE toLower(o.name) CONTAINS "poggia"
RETURN o.name as name, o.countries as countries LIMIT 20

In [None]:
# Plotting Poggia's graph
results = %cypher \
    MATCH (o:Officer) WHERE toLower(o.name) CONTAINS "poggia" \
    match p=(o)-[r*1..100]-() \
    RETURN p limit 100

In [None]:
G = results.get_graph()
draw(G)

In [None]:
# Plotting energo graph
results2 = %cypher \
    MATCH (o1:Entity) WHERE toLower(o1.name) CONTAINS "energo" \
    match p=(o1)-[r*1..100]-() \
    RETURN p limit 100

In [None]:
G2 = results2.get_graph()
draw(G2)

We can see results in Morges and Porrentruy (+ Russia, Spain, Panama) all centralized around GENINT SA  
http://ge.ch/hrcintapp/externalCompanyReport.action?companyOfrcId13=CH-660-0806985-5&ofrcLanguage=2

In [None]:
# Plotting a graph concerning the IMD school in Lausanne
results3 = %cypher \
    MATCH (o1:Address) WHERE toLower(o1.address) CONTAINS "schmidheiny" \
    match p=(o1)<-[r*1..100]-() \
    RETURN p limit 20

In [None]:
G3 = results3.get_graph()
draw(G3)