In [0]:
# Imports
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
import networkx as nx
import pandas as pd
import operator
import requests
import difflib
import json
import math
import time

#%matplotlib inline

In [0]:

# Import data
url_data = 'https://raw.githubusercontent.com/cpenalozag/SocialNetworksData/master/CalidadAgua/Calidad_Agua_SIVICAP.csv'
data = pd.read_csv(url_data)
        
nodes = {}
 
# Fill node dictionary
for index, row in data.iterrows():
    nodes[row['MUNICIPO']]={'riesgo': row['NIVEL DE RIESGO PROMEDIO'], 'departamento': row['DEPARTAMENTO']}

# Import population data
url_population = 'https://raw.githubusercontent.com/cpenalozag/SocialNetworksData/master/CalidadAgua/Poblacion_Municipios.csv'
population = pd.read_csv(url_population, sep=';')

def similar(landstring, country):
  l = difflib.get_close_matches(landstring, country, 1)
  if not l: return []
  return l[0]

population.head()
for index, row in population.iterrows():
  name = similar(row['MPIO'], nodes)
  if not name: continue
  if name in nodes:
    nodes[name]['poblacion']=float(row['2015'])
 

In [81]:
# Get coordinates
def coords(query):
  url = "https://us1.locationiq.com/v1/search.php"

  data = {
      'key': '878fbe47c15737',
      'q': query,
      'format': 'json'
  }
  
  response = requests.get(url, params=data)
  
  d = json.loads(response.text)
  
  return (d[0]['lat'], d[0]['lon'])

for n in nodes:
  
  q = '%s, %s' % (n,nodes[n]['departamento'])
  lat_lon = coords(q)
  nodes[n]['lat']=lat_lon[0]
  nodes[n]['lon']=lat_lon[1]
  time.sleep(1) # rate limit: 1 req/second
nodes

{'Abejorral': {'departamento': 'Antioquia',
  'lat': '5.7914299',
  'lon': '-75.4270381',
  'poblacion': 19290.0,
  'riesgo': 'SIN RIESGO'},
 'Abrego': {'departamento': 'Norte de Santander',
  'lat': '8.1420669',
  'lon': '-73.1196354',
  'poblacion': 37997.0,
  'riesgo': 'MEDIO'},
 'Abriaquí': {'departamento': 'Antioquia',
  'lat': '6.6284892',
  'lon': '-76.083530093352',
  'poblacion': 2128.0,
  'riesgo': 'SIN RIESGO'},
 'Acacías': {'departamento': 'Meta',
  'lat': '3.9861122',
  'lon': '-73.7583005',
  'poblacion': 68888.0,
  'riesgo': 'MEDIO'},
 'Acevedo': {'departamento': 'Huila',
  'lat': '1.8070452',
  'lon': '-75.888501',
  'poblacion': 32897.0,
  'riesgo': 'ALTO'},
 'Achí': {'departamento': 'Bolívar',
  'lat': '8.60283045',
  'lon': '-74.4586880450204',
  'poblacion': 23051.0,
  'riesgo': 'MEDIO'},
 'Agrado': {'departamento': 'Huila',
  'lat': '2.2580718',
  'lon': '-75.7712079',
  'poblacion': 8834.0,
  'riesgo': 'ALTO'},
 'Aguachica': {'departamento': 'Cesar',
  'lat': '8.2

In [0]:
# Write nodes.csv

with open('nodes.csv', 'w') as outf:
    outf.write('municipio,departamento,lat,lon,poblacion,riesgo\n')
    for n in nodes:
        if not math.isnan(nodes[n]['poblacion']):
          outf.write('%s,%s,%s,%s,%d,%s\n' % (n, nodes[n]['departamento'], nodes[n]['lat'], nodes[n]['lon'], nodes[n]['poblacion'], nodes[n]['riesgo']) )

In [0]:
import geopy.distance

def get_distance(coords_1, coords_2):
  return geopy.distance.vincenty(coords_1, coords_2).km
  

nd = pd.read_csv('nodes.csv')


In [113]:
# Max distance for creating an edge
edges = []
distance_limit = 100


for i in range(len(nd)): 
  c1 = (nd.loc[i,'lat'],nd.loc[i,'lon'])
  for j in range(i+1, len(nd)) : 
    c2 = (nd.loc[j,'lat'],nd.loc[j,'lon'])
    distance = get_distance(c1, c2)
    if distance < distance_limit:
      edges.append([nd.loc[i,'municipio'],nd.loc[j,'municipio'],distance])
      
len (edges) 

35231

In [0]:
# Write edges file
with open('edges.csv', 'w') as outf:
    outf.write('source,target,weight\n')
    for edge in edges:
        outf.write('%s,%s,%d\n' % (edge[0], edge[1], edge[2]))
