In [None]:
import folium
import pandas as pd
import random

import numpy as np

In [None]:
m = folium.Map(location=[-10.00, -55.00], zoom_start=4)

brasil_edge = 'br_muns.geojson'


In [None]:
folium.GeoJson(
    brasil_edge,
    name='geojson'
).add_to(m)

In [None]:
m

In [None]:
# load dataset sample
# The data to load

# Count the lines
num_lines = sum(1 for l in open('file2.csv'))

# Sample size - in this case ~5%
size = int(num_lines / 45)

# The row indices to skip - make sure 0 is not included to keep the header!
skip_idx = random.sample(range(1, num_lines), num_lines - size)

# Read the 
packages = pd.read_csv('file2.csv', skiprows=skip_idx)

In [None]:
print('Dataset dimensions:', packages.shape)

In [None]:
print('Columns:\n\n', list(packages.columns))
packages.sample(5)

In [None]:
datetime_columns = ['deadline_time',
                     'first_delivery_attempt_time',
                     'crossdocking_arrival_time',
                     'transfer_dispatch_time',
                     'transfer_receival_time',
                     'last_mile_allocation_start_time',
                     'last_mile_driver_pickup_time']


for columns in datetime_columns:
    packages[columns] = pd.to_datetime(packages[columns], format = '%Y-%m-%d %H:%M:%S')

packages.info()

In [None]:
# categories
categorical_cols = ['agreed_slo','final_city','mesoregion', 'status']

print('Categories per column:\n')
for column in categorical_cols:
    print(column)
    print('# of Categories: ', packages[column].nunique())
    print('Categories: ',packages[column].unique(),'\n')

In [None]:
# categories
id_cols = ['package_id','company_id']

print('IDs per column:\n')
for column in id_cols:
    print(column)
    print('Amount of unique ID: ', packages[column].nunique())

print('Over ', packages.shape[0])

In [None]:
most_demanded_mesoregions = packages['mesoregion'].value_counts().head(30).index.tolist()
most_demanded_mesoregions

In [None]:
packages[packages['mesoregion'].map(lambda x: x in most_demanded_mesoregions)]['mesoregion'].value_counts()

In [None]:
packages[packages['mesoregion'].map(lambda x: x in most_demanded_mesoregions)]

In [None]:
state_data = packages[packages['mesoregion'].map(lambda x: x in most_demanded_mesoregions)]['mesoregion'].value_counts()

In [None]:
brasil_edge

In [None]:
state_data

In [None]:
brasil_data = pd.read_csv('brazil_cities.csv')
brasil_data.count

In [None]:
brasil_data[brasil_data.duplicated(['city'])]

In [None]:
# sorting by first name 
brasil_data.sort_values("city", inplace = True) 
  
# making a bool series 
bool_series = brasil_data["city"].duplicated(keep = False) 
  
# bool series 
bool_series 
  
# passing NOT of bool series to see unique values only 
new_brasil_data = brasil_data[~bool_series]
  
# displaying data 
new_brasil_data.info() 
new_brasil_data 

In [None]:
new_brasil_data[new_brasil_data['city'].str.startswith('B')]

In [None]:
df_new = state_data.to_frame()

df_new.reset_index(level=0, inplace=True)

df_new

In [None]:
df_new = df_new.rename(columns={"index":"city"})

In [None]:
df_new.sort_values("city", inplace = True)

df_new

In [None]:
df_new['lat']=0
df_new['lng']=0

df_cities = pd.DataFrame(columns=['city', 'mesoregion', 'lat', 'lng'])

for ind in df_new.index:
    exists =  df_new['city'][ind] in new_brasil_data['city'].values
    if exists:
        ciudad = df_new['city'][ind]
        rslt_df = pd.DataFrame(new_brasil_data[new_brasil_data['city']==ciudad])

        lat_temp = 0
        lng_temp = 0
        for index, value in rslt_df.lat.items():
            lat_temp = value

        for index, value in rslt_df.lng.items():
            lng_temp = value

        new_row=[{'city': ciudad, 'mesoregion':df_new['mesoregion'][ind], 
                 'lat':lat_temp, 'lng': lng_temp}]
        
        df_temp = pd.DataFrame(data=new_row, columns=['city', 'mesoregion', 'lat', 'lng'])
        
        df_cities = pd.concat([df_cities, df_temp], ignore_index=True)
        
print(df_cities)

In [None]:
from folium import plugins 

import branca

In [None]:
stationArr = df_cities[['lat', 'lng']].values

# plot heatmap
m.add_child(plugins.HeatMap(stationArr, radius=25))
m

In [None]:
m = folium.Map(location=[-10.00, -55.00], zoom_start=4)

folium.Choropleth(
    geo_data=brasil_edge,
    data=df_cities,
    columns=['city', 'mesoregion'],
    fill_color='YlGnBu',
    fill_opacity=0.2,
    line_opacity=1,
    highlight=True,
    smooth_factor=0,
    legend_name='Packages'
).add_to(m)

m

In [None]:



world_choropelth = folium.Map(location=[-10.00, -55.00], tiles='Mapbox Bright',zoom_start=4)

world_choropelth.choropleth(
    geo_data=brasil_edge,
    data=df_cities,
    columns=['city','mesoregion'],
    key_on='feature.properties.nome',
    fill_color='YlOrRd',
    fill_opacity=0.2, 
    line_opacity=1,
    highlight=True,
    smooth_factor=0,
    
    legend_name='Packages Delivered')

folium.LayerControl().add_to(world_choropelth)
# display map
world_choropelth