In [3]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
    
# funciones desarrolladas
from functions.agrupar_dfs_censo import *
from functions.cargar_data import *

In [44]:
# carga datos
censo = cargar_censo()

In [45]:
def filter_df_censo(df):
    # un primer análisis abarcando sólo aquellas personas que han migrado entre departamentos
    # (no toma en cuenta quienes han migrado otras localidades dentro del mismo departamento)
    mgr = df.loc[df.PERMI07 == 3].reset_index(drop=True)
    # identifica depto de residencia anterior, creando var "depto_origen"
    mgr.insert(0, 'depto_origen', mgr.loc[:,('PERMI07_2')])
    # convierte a integer
    mgr.loc[:,('depto_origen')] = mgr.loc[:,('depto_origen')].astype(int)
    # renombra DPTO
    mgr.rename(columns={'DPTO': 'depto_destino'}, inplace=True)

    return mgr

In [46]:
mgr_interdep = filter_df_censo(censo)
mgr_interdep.shape[0]

148759

In [49]:
del censo

NameError: name 'censo' is not defined

In [52]:
# genera dataframes por tipología
def etiquetar_df_mig(df, ver_dep_ori, var_dep_des):
    "Corta DFs según origen y destino de la migración"
    # destino Mvdeo.
    df.loc[df[var_dep_des] == 1, 'label'] = 1
    # origen Mvdeo. y destino no Mvdeo.
    df.loc[(df[ver_dep_ori] == 1) & (df[var_dep_des] != 1), 'label'] = 2
    # origen y destino no Mvdeo.
    df.loc[(df[ver_dep_ori] != 1) & (df[var_dep_des] != 1), 'label'] = 3
    
    return df

# aplica función
df = etiquetar_df_mig(mgr_interdep, 'depto_origen', 'depto_destino')

In [53]:
df.head()

Unnamed: 0,depto_origen,depto_destino,LOC,SECC,SEGM,VIVID,TIPO_VIVIE,HOGCOD,HOGID,PERPH02,...,PERED03_R,PERED03_1,PERED03_2,PERED04_R,PERED05_R,PERED06_R,CODIGO_CAR,PERED08,NIVELEDU_R,label
0,3,1,20,1,1,5,1,102001001000501,1,1,...,0,11,0,2,3,4,55131,1,9,1.0
1,17,1,20,1,1,30,1,102001001003001,1,2,...,11,0,0,0,2,4,55152,1,9,1.0
2,18,1,20,1,1,36,1,102001001003601,1,1,...,0,6,0,2,2,0,33101,1,5,1.0
3,2,1,20,1,1,46,1,102001001004601,1,1,...,0,6,0,2,1,0,33101,1,5,1.0
4,12,1,20,1,1,93,1,102001001009301,1,2,...,0,4,0,2,1,0,22101,1,4,1.0


In [63]:
variables = ['depto_origen','TIPO_VIVIE', 'PERPH02', 'PERPA01', 'PERER02',
             'PERNA01', 'PERMI01',  'PERMI05', 'PERMI05_1', 
             'PERED00','PERED01', 'PERED02', 'PERED02_1', 'PERED02_2', 'PERED02_3',
             'PERED02_4', 'PERED03_R', 'PERED03_1', 'PERED03_2', 'PERED04_R','NIVELEDU_R']


df_2 = df.loc[:, variables].fillna(0)

In [55]:
del mgr_interdep

In [64]:
df_2.head()

Unnamed: 0,depto_origen,TIPO_VIVIE,PERPH02,PERPA01,PERER02,PERNA01,PERMI01,PERMI05,PERMI05_1,PERED00,...,PERED02,PERED02_1,PERED02_2,PERED02_3,PERED02_4,PERED03_R,PERED03_1,PERED03_2,PERED04_R,NIVELEDU_R
0,3,1,1,2,3,24.0,1,2,2,0,...,0,0.0,0.0,0.0,0,0,11,0,2,9
1,17,1,2,9,3,22.0,3,2,4,0,...,1,0.0,0.0,0.0,0,11,0,0,0,9
2,18,1,1,4,1,19.0,3,2,1,0,...,0,0.0,0.0,0.0,0,0,6,0,2,5
3,2,1,1,12,3,24.0,3,2,2,0,...,0,0.0,0.0,0.0,0,0,6,0,2,5
4,12,1,2,1,3,28.0,3,2,3,0,...,0,0.0,0.0,0.0,0,0,4,0,2,4


In [65]:
data = np.array(df_2.values)

In [66]:
y = np.array(df.loc[:,'label'])

In [67]:
X_train, X_test, y_train, y_test = train_test_split(data, y, random_state=0)

In [None]:
model = SVC(kernel='rbf', C=1)
model.fit(X_train,y_train)

# plot_mushroom_boundary(X_test, y_test, model)

In [None]:
def plot_mushroom_boundary(X, y, fitted_model):

    plt.figure(figsize=(9.8,5), dpi=100)
    
    for i, plot_type in enumerate(['Decision Boundary', 'Decision Probabilities']):
        plt.subplot(1,2,i+1)

        mesh_step_size = 0.01  # step size in the mesh
        x_min, x_max = X[:, 0].min() - .1, X[:, 0].max() + .1
        y_min, y_max = X[:, 1].min() - .1, X[:, 1].max() + .1
        xx, yy = np.meshgrid(np.arange(x_min, x_max, mesh_step_size), np.arange(y_min, y_max, mesh_step_size))
        if i == 0:
            Z = fitted_model.predict(np.c_[xx.ravel(), yy.ravel()])
        else:
            try:
                Z = fitted_model.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:,1]
            except:
                plt.text(0.4, 0.5, 'Probabilities Unavailable', horizontalalignment='center',
                     verticalalignment='center', transform = plt.gca().transAxes, fontsize=12)
                plt.axis('off')
                break
        Z = Z.reshape(xx.shape)
        plt.scatter(X[y.values==0,0], X[y.values==0,1], alpha=0.4, label='Edible', s=5)
        plt.scatter(X[y.values==1,0], X[y.values==1,1], alpha=0.4, label='Posionous', s=5)
        plt.imshow(Z, interpolation='nearest', cmap='RdYlBu_r', alpha=0.15, 
                   extent=(x_min, x_max, y_min, y_max), origin='lower')
        plt.title(plot_type + '\n' + 
                  str(fitted_model).split('(')[0]+ ' Test Accuracy: ' + str(np.round(fitted_model.score(X, y), 5)))
        plt.gca().set_aspect('equal');
        
    plt.tight_layout()
    plt.subplots_adjust(top=0.9, bottom=0.08, wspace=0.02)