In [1]:
import pandas as pd
import os
import folium
from geopy.geocoders import Nominatim

pd.set_option('display.max_columns', 500)

In [2]:
DATA_FOLDER = '../data/'

edges_csv = 'panama_papers.edges.csv'
intermediary_csv = 'panama_papers.nodes.intermediary.csv' # company or individuals
address_csv = 'panama_papers.nodes.address.csv'
officer_csv = 'panama_papers.nodes.officer.csv'
entity_csv = 'panama_papers.nodes.entity.csv' # tax heaven companies

In [3]:
# Importing the Panama Papers data into DataFrames
df_edges_raw = pd.read_csv(DATA_FOLDER + edges_csv,low_memory=False)
df_address_raw = pd.read_csv(DATA_FOLDER + address_csv,low_memory=False)
df_entity_raw = pd.read_csv(DATA_FOLDER + entity_csv,low_memory=False)
df_intermediary_raw = pd.read_csv(DATA_FOLDER + intermediary_csv,low_memory=False)
df_officier_raw = pd.read_csv(DATA_FOLDER + officer_csv,low_memory=False)

In [4]:
threshold1=1000
df_entity_S1=df_entity_raw.copy()
vc=df_entity_S1.jurisdiction_description.value_counts()
tax_heavens_L=list(vc[vc > threshold1].index)
tax_heavens_S=list(vc[vc <= threshold1].index)
df_entity_S1.loc[~df_entity_S1.jurisdiction_description.isin(tax_heavens_L), 'jurisdiction_description'] = 'Others'

print(df_entity_S1.jurisdiction_description.value_counts(),"\n")
print('Jurisdictions in Others category are: ',tax_heavens_S)

British Virgin Islands    113648
Panama                     48360
Bahamas                    15915
Seychelles                 15182
Niue                        9611
Samoa                       5307
British Anguilla            3253
Nevada                      1260
Others                      1098
Name: jurisdiction_description, dtype: int64 

Jurisdictions in Others category are:  ['Hong Kong', 'United Kingdom', 'Belize', 'Costa Rica', 'Cyprus', 'Uruguay', 'New Zealand', 'Jersey', 'Wyoming', 'Malta', 'Isle Of Man', 'Ras Al Khaimah', 'Singapore']


In [5]:
jurisdiction_df = pd.DataFrame(df_entity_raw.jurisdiction_description.value_counts())
jurisdiction_df.reset_index(inplace=True)
jurisdiction_df.columns = ["Area", "Count"]

In [6]:
# getting long, lat for all jurisdictions
geolocator = Nominatim(user_agent='ADA_AKBAR')
j_lat = []
j_long = []

for ind, row in jurisdiction_df.iterrows():
    if row['Area'] == 'British Anguilla':
        location = geolocator.geocode('Anguilla')
    else:
        location = geolocator.geocode(row['Area'])
    j_lat.append(location.latitude)
    j_long.append(location.longitude)

jurisdiction_df['Latitude'] = j_lat
jurisdiction_df['Longitude'] = j_long

In [7]:
# FOLIUM KEEP FOR NOW
# jur_m = folium.Map(tiles='CartoDB positron')
# j_lat_long = jurisdiction_df[['Latitude', 'Longitude']].values.tolist()
# for i in range(len(j_lat_long)):
#     folium.Marker(j_lat_long[i], 
#                   popup=jurisdiction_df['Jurisdiction'][i] + ': ' + str(jurisdiction_df['Count'][i])).add_to(jur_m)
# jur_m

In [8]:
# since using Mercator Projection, need to convert
# lat/lon into x,y coordinates on map using following
# equation

import math
    
def merc(lat_lst, lon_lst):
    x_lst = []
    y_lst = []
    for i in range(len(lat_lst)):
        lat = lat_lst[i]
        lon = lon_lst[i]
        r_major = 6378137.000
        x = r_major * math.radians(lon)
        scale = x/lon
        y = 180.0/math.pi * math.log(math.tan(math.pi/4.0 + 
            lat * (math.pi/180.0)/2.0)) * scale
        x_lst.append(x)
        y_lst.append(y)
    return x_lst, y_lst

x_y = merc(jurisdiction_df.Latitude, jurisdiction_df.Longitude)
jurisdiction_df['x'] = x_y[0]
jurisdiction_df['y'] = x_y[1]

In [9]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.tile_providers import CARTODBPOSITRON
from bokeh.models import ColumnDataSource, HoverTool

def plot_map(df):
    source = ColumnDataSource(data=dict(
                            x=list(df['x']), 
                            y=list(df['y']),
                            count=list(df['Count']),
                            area=list(df['Area'])))

    hover = HoverTool(tooltips=[
        ("Area", "@area"),
        ("Count","@count")])

    p = figure(
        width = 800, 
        height=400, 
        x_range=(-20000000, 30000000), y_range=(-100, 10000),
        x_axis_type="mercator", y_axis_type="mercator",
        tools=['pan', hover, 'wheel_zoom','save'])

    p.add_tile(CARTODBPOSITRON)

    p.circle(x = 'x',
             y = 'y', 
             size = 10,
             source=source, 
             line_color="#FF0000", 
             fill_color="#FF0000",
             fill_alpha=0.05)

    output_notebook()
    show(p)
    
plot_map(jurisdiction_df)

In [10]:
# do the same for countries, filtering by countries > 1000

s = df_entity_raw.countries.value_counts()
s = s[s > 1000]
country_df = pd.DataFrame(s).reset_index()
country_df.columns = ['Area', 'Count']

c_lat = []
c_long = []

for ind, row in country_df.iterrows():
    location = geolocator.geocode(row['Area'])
    c_lat.append(location.latitude)
    c_long.append(location.longitude)

country_df['Latitude'] = c_lat
country_df['Longitude'] = c_long

x_y_c = merc(country_df.Latitude, country_df.Longitude)
country_df['x'] = x_y_c[0]
country_df['y'] = x_y_c[1]

In [11]:
plot_map(country_df)