In [1]:
from __future__ import division
import pandas as pd
import numpy as np
from matplotlib.mlab import griddata
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
import matplotlib.patches as patches
from matplotlib import cm
from matplotlib.path import Path
from matplotlib.patches import PathPatch
from scipy.interpolate import Rbf
import shapefile
import argparse
import sys
import re

In [4]:
def dms2dec(dms_str):
    """
    Return decimal representation of DMS
   
    """
    dms_str = re.sub(r'\s', '', dms_str)
    sign = -1 # estamos trabajando sobre lats y lons de argentina
    (degree, minute, second, junk) = re.split('\D+', dms_str, maxsplit=3)
    
    return sign * (int(degree) + float(minute) / 60 + float(second) / 3600)

In [3]:
df = pd.read_excel("data/virmapa.xls", encoding='utf-8')
df['lat'] = df['lat'].apply(dms2dec)
df['long'] = df['long'].apply(dms2dec)

df.columns = ['LAT', 'LON'] + list(df.columns.values)[2:]
df.to_excel("data/virmapa.xls")

In [12]:
for column in df.columns[2:]:
    df[column] = df[column].apply(lambda l: 100 * (l / df[column].sum()))


Unnamed: 0,LAT,LON,ARGUELLO,BOTANELLI,CORTEZ,ESCALANTE,GARRIDO,LACERDA,MAGALLANES,MORINIGO,ORTIZ,RUIZ,SOMBRA
Capital Federal,-34.55,-58.466667,3.420519,0,2.974212,3.305869,7.288306,2.531646,4.320872,4.346131,4.657841,35.69707,25.143325
Buenos Aires,-34.931389,-57.948889,24.983779,60,17.930632,34.226356,30.816532,11.392405,37.516438,40.966673,33.379407,5.353738,2.620803
Catamarca,-28.466667,-65.783333,4.708029,0,0.858118,2.552697,0.080645,0.0,0.169078,0.077958,1.046197,0.507206,0.0
Chaco,-27.45,-58.983333,1.265207,0,1.406236,12.763484,0.665323,0.0,1.690776,9.043072,3.415035,3.319104,0.0
Chubut,-43.3,-65.1,1.206407,0,0.813191,0.571197,2.550403,0.0,0.93932,0.233873,0.820991,1.327061,0.2457
Córdoba,-31.4,-64.183333,15.774534,0,15.54048,3.391801,8.387097,0.0,10.031937,0.93549,7.788091,5.027538,4.340704
Corrientes,-27.466667,-58.833333,0.403487,0,0.206667,13.769398,1.118952,0.0,1.315048,3.118301,3.895237,0.588514,0.0
Entre Rios,-31.733333,-60.533333,0.27575,0,0.269566,1.703483,0.493952,21.518987,2.291941,1.695576,2.421267,1.751992,0.0
Formosa,-26.183333,-58.183333,1.978913,0,0.332465,2.355558,2.066532,0.0,0.093932,14.11031,3.090929,2.103358,0.0
Jujuy,-24.183333,-65.3,0.87794,0,3.131458,1.334479,0.625,0.0,0.018786,0.136426,1.527591,2.665737,0.2457


In [5]:
class MapData(object):
    """
    On map data you can find the coordinates and the ancestry information
    ordered by code.
    """
    def __init__(self, filename, columns):
        """
        load the data files (coord and ancestry) and merge the data by code.
        """
        super(MapData, self).__init__()
        
        self.df = self.load_file(filename, columns)

    def load_file(self, filename, columns):
        """the file has to be CODE Lat Lon """
        df = pd.read_excel(filename)
        df = df.dropna()
        df_values = df[['LAT', 'LON'] + columns]
        return df_values

    def get_coordinates(self):
        """
        give the coordinates to do the mesh (can't be duplicate data)
        """
        self.coordinates = self.df[['LAT', 'LON']].drop_duplicates()

    def project_coordinates(self, m):
        self.coordinates['projected_lon'], self.coordinates['projected_lat'] = m(*(self.coordinates['LON'].values, self.coordinates['LAT'].values))
        lat = [-22.051, -23.831765, -57.9770, -58.1860]
        lon = [-73.185825, -49.4553, -84.3632, -47.5781]
        self.rect_lon, self.rect_lat = m(*(lon, lat))

    def interpolate(self, numcols=100, numrows=100):
        """
        Take the convex hull of all cordinates to generate a meshgrid
        """
        xi = np.linspace(min(self.rect_lon), max(self.rect_lon), numcols)
        yi = np.linspace(min(self.rect_lat), max(self.rect_lat), numrows)
        
        xi, yi = np.meshgrid(xi, yi)
        # interpolate
        x, y, z = self.coordinates['projected_lon'].values, self.coordinates['projected_lat'].values, self.df[column].values.ravel()
        interp = Rbf(x, y, z, smooth=0.01, fuction='thin_plate')
        zi = interp(xi, yi)
        zi = np.clip(zi, a_min=0., a_max=100.)
        #zi = griddata(x, y, z, xi, yi)

        return xi, yi, zi, x, y, z

In [6]:
class MainDisplay(object):
    """In this class we have the reference to our display map and the method of how to draw it."""
    def __init__(self, lllon=-180, lllat=-80, urlon=0, urlat=40, figsize=(11.7,8.3), fs='data/continents/continent'):
        super(MainDisplay, self).__init__()
        plt.clf()
        self.fig = plt.figure(figsize=figsize)
        self.ax = self.fig.add_subplot(111, axisbg='w', frame_on=False)
        self.anc_map = Basemap(projection = 'merc', llcrnrlon = lllon,
                                llcrnrlat = lllat, urcrnrlon = urlon,
                                urcrnrlat = urlat, resolution='h')
        self.anc_map.readshapefile(fs, 'borders', drawbounds=False, linewidth=0.8)
    
    def draw(self, xi, yi, zi, x, y, z, coordinates, ancestry, clips):
        """
        This methods display the ancestry data from the MapData class, in this
        method you can setup the color display and resolution of the map.
        """
        norm = Normalize()
        self.anc_map.drawmapboundary(fill_color = 'white')
        self.anc_map.fillcontinents(color='#C0C0C0', lake_color='#7093DB')
        self.anc_map.drawcountries(
            linewidth=.75, linestyle='solid', color='#000073',
            antialiased=True,
            ax=self.ax, zorder=3)

        # contour plot
        con_ = self.anc_map.contour(xi, yi, zi, zorder=6, levels=np.arange(round(min(z)), round(max(z) + 30.), 1.), colors=('k',), linewidths=(0.2,), alpha=0.5)
        con = self.anc_map.contourf(xi, yi, zi, zorder=5, cmap='jet', levels=np.arange(round(min(z)), round(max(z) + 30.), 1.))
        
        #check alpha parameter for areas without data
        # clip the data so only display the data inside of the country
        #for shape_clip in clips:
        for contour in con_.collections:
            contour.set_clip_path(clips)
        
        for contour in con.collections:
            contour.set_clip_path(clips)
        
        # scatter plot
        self.anc_map.scatter(
            coordinates['projected_lon'],
            coordinates['projected_lat'],
            color='#545454',
            edgecolor='#ffffff',
            alpha=.75,
            s=30, #  * norm(ancestry),
            cmap='RdPu',
            ax=self.ax,
            vmin=zi.min(), vmax=zi.max(), zorder=5)

        # add colour bar
        cbar = self.anc_map.colorbar()

# TODO move this to an other module
def process_shapefile(filename_shp, my_map, ax):
    # http://basemaptutorial.readthedocs.org/en/latest/clip.html
    sf_ = shapefile.Reader(filename_shp)
    #americas = [sf_.shapeRecords()[1], sf_.shapeRecords()[4]]
    vertices = []
    codes = []
    for shape_rec in sf_.shapeRecords():
    #shape_rec = sf_.shapeRecords()[num] # 1 north ame 4 south
        lons,lats = zip(*shape_rec.shape.points)
        pts = np.array(my_map(lons, lats)).T
        prt = list(shape_rec.shape.parts) + [len(pts)]
        for i in range(len(prt) - 1):
            for j in range(prt[i], prt[i+1]):
                vertices.append((pts[j][0], pts[j][1]))
            codes += [Path.MOVETO]
            codes += [Path.LINETO] * (prt[i+1] - prt[i] -2)
            codes += [Path.CLOSEPOLY]
    clip = Path(vertices, codes)
    clip = PathPatch(clip, transform=ax.transData)

    return clip  

In [73]:
filename, column = "data/virmapa.xls", ["SOMBRA"]

In [74]:
lllon = -180.
lllat = -80.
urlon = 0.
urlat = 0.
display = MainDisplay(lllon, lllat, urlon, urlat, fs='data/ARG_adm/ARG_adm0')
# load ancestry and location data
map_data = MapData(filename, column)
map_data.get_coordinates()

map_data.project_coordinates(display.anc_map)
xi, yi, zi, x, y, z = map_data.interpolate()

clips = process_shapefile('data/ARG_adm/ARG_adm0', display.anc_map, display.ax)
#print shape_clip
display.draw(xi, yi, zi, x, y, z, map_data.coordinates, map_data.df[column].values.ravel(), clips)

plt.title("APELLIDOS - {}".format(column[0]))
plt.show()