# Lookup politicians and their interests in the Panama Papers

Using the data we obtained in the previous notebooks, we will now look up those politicians and their interests in the panama papers.

In [5]:
%matplotlib inline
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# New imports for using Neo4j
import networkx as nx
#from py2neo import Graph
from helpers.vis import draw
%set_env NEO4J_URL http://neo4j:neo4j2@localhost:7474
#%load_ext cypher

env: NEO4J_URL=http://neo4j:neo4j2@localhost:7474


In [2]:
graph = Graph('http://neo4j:neo4j2@localhost:7474')

In [27]:
# function to search columns of a pandas dataframe
def search(df, col, words):
    cond = [df[col].str.contains('(?<![\w\d])'+w+'(?![\w\d])', case=False, na=False) for w in words]
    return df[np.logical_and.reduce(cond)]

# split a string in words with no ponctuation
import re
def split(str):
    return re.split('\W+', str)

In [31]:
# load datasets
from helpers.swiss_codes import filter_swiss
# 1. officers
officers = pd.read_csv('data/Officers.csv', header=0, low_memory=False)
swiss_officers = filter_swiss(officers, process_address=False)
del officers
# 2. entities
entities = pd.read_csv('data/Entities.csv', header=0, low_memory=False)
swiss_entities = filter_swiss(entities)
del entities
# 3. intermediaries
inter = pd.read_csv('data/Intermediaries.csv', header=0, low_memory=False)
swiss_inter = filter_swiss(inter)
del inter

## All interests

In [42]:
all_interests = pd.read_json('data/all_interests.json')
all_interests

Unnamed: 0,data_source,first_name,index,interest_name,last_name,position
0,federal_parliament,Christoph,68,Kraftwerk Birsfelden AG,Eymann,Membre
1,federal_parliament,Christoph,68,Universität Basel (Universitätsrat),Eymann,Membre
10,federal_parliament,Ulrich,85,Online Easy AG,Giezendanner,Vice-président(e)
100,federal_parliament,Kathy,439,Organe consultatif sur les changements climati...,Riklin,Président(e)
1000,federal_parliament,Albert,3372,IG Volkskultur Schweiz und Fürstentum Liechten...,Vitali,Président(e)
1001,federal_parliament,Albert,3372,Pro Senectute Kanton Luzern (Stiftungsversamml...,Vitali,Membre
1002,federal_parliament,Albert,3372,Energie Oberkirch AG,Vitali,Président(e)
1003,federal_parliament,Albert,3372,Schweizer Verband für Rechnungslegung,Vitali,Membre
1004,federal_parliament,Albert,3372,Wohnen Schweiz,Vitali,Membre
1005,federal_parliament,Albert,3372,Förderverein Luzern Volksschulen,Vitali,Membre


Looking for names in Officers:

In [6]:
rs = []
for _,f,l in all_interests[['first_name','last_name']].drop_duplicates().itertuples():
    ws = f + ' ' + l
    r = search(swiss_officers, 'name', split(ws))
    if r.size > 0 or l == 'Zacharias':
        rs.append(ws)

In [7]:
rs

['Mauro Poggia', 'Ronald Zacharias']

And the companies from Entities and Intermedietaries:

In [51]:
try:
    inters = pd.read_csv('data/all_interestsd_entities_name.csv', index_col=0)
except:
    ents = []
    for _, interest in all_interests.drop_duplicates(subset=["interest_name"]).iterrows():
        ws = split(interest["interest_name"])
        r = search(swiss_entities, 'name', ws)
        if r.size > 0:
            for idx,name in r['name'].iteritems():
                ents.append([interest["interest_name"].strip(),idx,name,interest["data_source"],interest["first_name"],interest["last_name"]])
    inters = pd.DataFrame(ents, columns=['interest_name','idx','entity_name', "data_source", "first_name", "last_name"])
    inters.to_csv('data/all_interests_entities_name.csv')

In [52]:
inters

Unnamed: 0,interest_name,idx,entity_name,data_source,first_name,last_name
0,SVK,119433,SVK INVEST INTERNATIONAL LIMITED,federal_parliament,Heinz,Brand
1,SVK,150129,SVK FOUNDATION,federal_parliament,Heinz,Brand
2,Aresa,173209,ARESA FINANCE LTD.,federal_parliament,Denis,de la Reussille
3,ASCA,53521,ASCA MANAGEMENT LTD.,geneva_parliament,Sarah,Klopmann
4,ARGOS,31154,ARGOS BUILDING CORPORATION LTD.,geneva_parliament,Christian,Frey
5,ARGOS,38862,ARGOS S.A.,geneva_parliament,Christian,Frey
6,ARGOS,148117,ARGOS-REALCO S.A.,geneva_parliament,Christian,Frey
7,ARGOS,152462,ARGOS (GULF) S.A.,geneva_parliament,Christian,Frey
8,ARGOS,164409,ARGOS TRADING LTD.,geneva_parliament,Christian,Frey
9,ARGOS,268869,Argos Inter Invest Ltd.,geneva_parliament,Christian,Frey


In [57]:
scraped_interests = json.loads(open("scraped_companies_1.json").read())
scraped_interests2 = json.loads(open("data/scraped_companies.json").read())

second_order_people = []

for company in scraped_interests+scraped_interests2:
    if not "persons" in company:
        continue
    for person in company["persons"]:
        second_order_people.append({
                "first_name": person["first_name"],
                "last_name": person["last_name"],
                "source": company["source"]
            })
        
second_order_people = pd.DataFrame(second_order_people)

second_order_results = []

for _, first, last, source in second_order_people.drop_duplicates(subset=["first_name", "last_name"]).itertuples():
    full_name = first + " " + last
    results = search(swiss_officers, 'name', split(full_name))
    if results.size > 0:
        for idx,name in results['name'].iteritems():
            second_order_results.append([full_name,idx,name, source["first_name"], source["last_name"], source["data_source"], source["interest_name"]])
    #second_order_results = pd.DataFrame(second_order_results, columns=['interest_name','idx','entity_name', 'source'])
    


In [58]:
pd.DataFrame(second_order_results)

Unnamed: 0,0,1,2,3,4,5,6
0,Peter Müller,236690,PETER MÜLLER,Albert,Vitali,federal_parliament,Energie Oberkirch AG
1,Urs Schneider,302919,Dr. Urs Schneider,Jacqueline,Badran,federal_parliament,Zugang für Alle
2,Jean Wenger,151170,JEAN-MARC WENGER,Daniela,Schneeberger,federal_parliament,Schweizerischer Gewerbeverband (sgv)
3,Jean Wenger,231228,JEAN MARC WENGER,Daniela,Schneeberger,federal_parliament,Schweizerischer Gewerbeverband (sgv)
4,Max Baumann,339514,Dr Iur Max Baumann,Nadja,Pieren,federal_parliament,Radio Emme AG
5,Peter Schmid,101187,MR. PETER SCHMID,Urs,Gasche,federal_parliament,Stiftung Schloss Jegenstorf
6,Alois Hodel,313817,Alois Hodel,Hansjörg,Walter,federal_parliament,Agrisano Prevos (Vorsorgestiftung)
7,Walter Moser,319378,Walter Jannik Moser,Isidor,Baumann,federal_parliament,Stiftung Historisches Erbe der SBB
8,Rudolf von Arx,333291,Von Arx Rudolf and Von Arx Nicole Jeannette,Bea,Heim,federal_parliament,Pro Senectute Kanton Solothurn


These matches have to be checked more in details as they seem to mean something else...

## Parlement data

In [10]:
parlement = pd.read_json('data/parliament_members_interests.json')

In [11]:
ps = []
for _,f,l in parlement[['FirstName','LastName']].drop_duplicates().itertuples():
    ws = f + ' ' + l
    r = search(swiss_officers, 'name', split(ws))
    if r.size > 0:
        ps.append(ws)

In [12]:
ps

['Peter Schmid',
 'Heinrich Baumann',
 'Johann Jakob',
 'Peter Müller',
 'Rudolf Ott',
 'Mauro Poggia']

Only some matches and there're a lot of possibilities with these names...

### Querying the Neo4j with Poggia

In [13]:
%%cypher 
MATCH (o:Officer) WHERE toLower(o.name) CONTAINS "poggia"
RETURN o.name as name, o.countries as countries LIMIT 20

1 rows affected.


name,countries
Mauro Poggia,Switzerland


In [14]:
# Plotting Poggia's graph
results = %cypher \
    MATCH (o:Officer) WHERE toLower(o.name) CONTAINS "poggia" \
    match p=(o)-[r*1..100]-() \
    RETURN p limit 100

15 rows affected.


In [15]:
G = results.get_graph()
draw(G)

In [16]:
# Plotting energo graph
results2 = %cypher \
    MATCH (o1:Entity) WHERE toLower(o1.name) CONTAINS "energo" \
    match p=(o1)-[r*1..100]-() \
    RETURN p limit 100

100 rows affected.


In [17]:
G2 = results2.get_graph()
draw(G2)

We can see results in Morges and Porrentruy (+ Russia, Spain, Panama) all centralized around GENINT SA  
http://ge.ch/hrcintapp/externalCompanyReport.action?companyOfrcId13=CH-660-0806985-5&ofrcLanguage=2

In [18]:
# Plotting a graph concerning the IMD school in Lausanne ;)
results3 = %cypher \
    MATCH (o1:Address) WHERE toLower(o1.address) CONTAINS "schmidheiny" \
    match p=(o1)<-[r*1..100]-() \
    RETURN p limit 20

8 rows affected.


In [19]:
G3 = results3.get_graph()
draw(G3)