In [3]:
import numpy as np
import pandas as pd

import geopandas as gpd
from shapely.geometry import Point
from shapely.ops import nearest_points

import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt

import os
import warnings
#import unicodedata
warnings.filterwarnings("ignore")
#from io import BytesIO
#import gzip
import requests

pd.set_option('display.max_rows',999)

In [4]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)

## Cargo cwpops

In [5]:
cwpops = pd.read_excel('files/cwpops.xlsx', index_col = [0,1])

## Cargo Distances, creo latlong

In [6]:
distances = pd.read_excel('files/distances.xlsx')

latlongs = distances[['Country','City A','Point A']].rename(columns = {'Point A':'latlong','City A':'City'}).drop_duplicates('City').set_index(['Country','City']).dropna()

latlongs[['lat','long']] = latlongs['latlong'].apply(lambda x: pd.Series(x.split(','))).astype(float)
latlongs = latlongs.drop('latlong', axis = 1)

#latlongs = latlongs.join(cwpops[['masalarial','estimated_citywage']])
latlongs = gpd.GeoDataFrame(latlongs, geometry=gpd.points_from_xy(latlongs.long, latlongs.lat))

In [7]:
cwpops = cwpops.join(latlongs)

cwpops = gpd.GeoDataFrame(
    cwpops, geometry=gpd.points_from_xy(cwpops.long, cwpops.lat))

## Funciones para *Cutoff* 

In [8]:
def get_wages_from_cutoff(city_index, cutoff):
    city_wage = cwpops.drop_duplicates('index1').loc[city_index]['estimated_citywage']
    distan = distances[(distances['Country'] == city_index[0])
                            &(distances['City A'] == city_index[1])
                            &(distances['parsed_duration'] <= cutoff)].drop_duplicates('City B')

    nearby_cities = [(x,y) for x,y in distan[['Country','City B']].values.tolist()]
    if len(nearby_cities) == 0:
        return city_wage
    else:
        nearby_wages = cwpops.loc[nearby_cities]['estimated_citywage'].sum()
        return nearby_wages + city_wage

In [9]:
def get_countryshare_from_cutoff(city_index, cutoff, share_out = True):
    nearwages = get_wages_from_cutoff(city_index, cutoff = cutoff)
    country_wages = cwpops.groupby('Country')['estimated_citywage'].sum().loc[city_index[0]]
    share = nearwages/country_wages
    if share_out == True:
        return share
    else:
        return nearwages

## *Cutoff*

In [10]:
shares_cutoff = pd.DataFrame(index = cwpops.index)

In [11]:
shares_cutoff['index1'] = shares_cutoff.index
for i in range(10):
    shares_cutoff['cutoff_' + str(i)] = shares_cutoff['index1'].apply(lambda x: get_countryshare_from_cutoff(x , cutoff = i))

In [12]:
shares_cutoff = pd.concat([shares_cutoff, shares_cutoff.mean().to_frame(('Promedios','Promedios')).T])

In [13]:
meanlat = shares_cutoff.mean().to_frame()
meanlat.columns = pd.MultiIndex.from_tuples([('Promedios','Promedio de Latam')])

shares_cutoff = pd.concat([shares_cutoff,meanlat.T])

In [14]:
shares_cutoff_abs = pd.DataFrame(index = cwpops.index)

shares_cutoff_abs['index1'] = shares_cutoff_abs.index
for i in range(10):
    shares_cutoff_abs['cutoff_' + str(i)] = shares_cutoff_abs['index1'].apply(
        lambda x: get_countryshare_from_cutoff(x, cutoff = i, share_out = False))

In [15]:
meanlat = shares_cutoff_abs.mean().to_frame()
meanlat.columns = pd.MultiIndex.from_tuples([('Promedios','Promedio de Latam')])

shares_cutoff_abs = pd.concat([shares_cutoff_abs,meanlat.T])

## Airports

In [16]:
air_data = pd.read_csv('https://raw.githubusercontent.com/jpatokal/openflights/master/data/airports-extended.dat', header = None)

In [17]:
air_data.columns = ['Airport ID', 'Name','City','Country','IATA','ICAO','Latitude','Longitude','Altitude','Timezone','DST', 
                    'Tz database time zone', 'Type','Source']


In [18]:
air_data['Airport ID'] = air_data['Airport ID'].astype('str')
air_data.loc[air_data['Name'].str.lower().str.contains('international'),'int'] = 1
air_data.loc[~air_data['Name'].str.lower().str.contains('international'),'int'] = 0

In [19]:
airports = gpd.GeoDataFrame(air_data, geometry=gpd.points_from_xy(air_data.Longitude, air_data.Latitude))

In [20]:
airports.reset_index(inplace=True)

#airports.set_index(['Country' , 'City' ],inplace=True)

#airports.sort_index(inplace=True)

## Aeropuertos más cercanos

In [21]:
def min_dist(point, gdf):
    gdf['Dist'] = gdf.apply(lambda row: point.distance(row.geometry),axis=1)
    a = gdf.iloc[gdf['Dist'].argmin()]['Airport ID']
    return a

In [22]:
latlongs['near_airport'] = latlongs.apply(lambda row: min_dist(row.geometry,airports),axis=1)

KeyboardInterrupt: 

## Hacemos una prueba para el caso de Arg. 

Intentamos calcular la distancia euclidea entre los puntos. Habría que revisar lo que mando jero de urbanpy, que parece que tiene una manera de calcular distancias en tiempo de automóvil.

In [39]:
arg = latlongs.loc['Argentina']

In [44]:
arg = arg.merge(airports,how='left',left_on= 'near_airport' ,right_on='Airport ID',indicator=True,validate='many_to_one')

In [53]:
arg['distance_to_air'] = arg.apply(lambda row: row['geometry_x'].distance(row['geometry_y']),axis=1)

In [54]:
arg.head()

Unnamed: 0,lat,long,geometry_x,near_airport,index,Airport ID,Name,City,Country,IATA,...,DST,Tz database time zone,Type,Source,int,geometry_y,Dist,dist,_merge,distance_to_air
0,-38.723,-62.279,POINT (-62.27900 -38.72300),2501,2384,2501,Comandante Espora Airport,Bahia Blanca,Argentina,BHI,...,N,America/Buenos_Aires,airport,OurAirports,0.0,POINT (-62.16930 -38.72500),48.79,0.11,both,0.11
1,-34.652,-58.547,POINT (-58.54700 -34.65200),2449,2334,2449,El Palomar Airport,El Palomar,Argentina,EPA,...,N,America/Buenos_Aires,airport,OurAirports,0.0,POINT (-58.61260 -34.60990),45.547,5.51,both,0.078
2,-28.455,-65.767,POINT (-65.76700 -28.45500),2455,2340,2455,Catamarca Airport,Catamarca,Argentina,CTC,...,N,America/Catamarca,airport,OurAirports,0.0,POINT (-65.75170 -28.59560),38.235,10.706,both,0.141
3,-34.608,-58.437,POINT (-58.43709 -34.60757),10365,8922,10365,Plaza de los Virreyes Station,Buenos Aires,Argentina,\N,...,U,\N,station,User,0.0,POINT (-58.46164 -34.64306),45.619,5.587,both,0.043
4,-45.846,-67.517,POINT (-67.51700 -45.84600),2487,2371,2487,General E. Mosconi Airport,Comodoro Rivadavia,Argentina,CRD,...,N,America/Catamarca,airport,OurAirports,0.0,POINT (-67.46550 -45.78530),55.205,8.762,both,0.08


## Velocidad de descarga de internet

In [None]:
mlab = pd.read_csv('files/mlab-cities.csv')

In [None]:
mlab.set_index(['Country' , 'City'],inplace=True)

In [None]:
mlab.drop(mlab.columns[1:], axis=1 , inplace=True)

In [None]:
mlab.head()