In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import altair as alt

In [2]:
df_dados = pd.read_excel('UN_MigrantStockByOriginAndDestination_2015.xlsx', sheet_name='Table 15', skiprows=15)

In [3]:
# df_dados.drop([i for i in df_dados.columns if 'Unnamed' in i], axis=1)

Quando unnamed 4 for nan, vamos remover

In [4]:
df_dados.query('not `Unnamed: 4`.isnull()', engine='python')['Unnamed: 4'].isna().sum()

0

In [5]:
df_dados = df_dados.rename(columns = {'Unnamed: 1' : 'Destination Country'})

In [6]:
df_dados = df_dados.drop([i for i in df_dados.columns if 'Unn' in i], axis=1)
df_dados = df_dados.drop(['Total'], axis=1)


In [7]:
# WIDE TO LONG
df_dados = df_dados.melt(id_vars = ['Destination Country'], var_name='Origin Country', value_name='Number People')

In [8]:
# REMOVE ALL LINES WHICH CONTAINS NAN VALUES ON ANY COLUMN
df_dados.dropna(axis=0, inplace=True)

In [9]:
df_dados_brazil = df_dados[df_dados['Destination Country'] == 'Brazil']
# OR USING QUERY
df_dados_brazil = df_dados.query('`Destination Country` == "Brazil"')

In [10]:
df_dados_brazil.sort_values('Number People', ascending=False)

Unnamed: 0,Destination Country,Origin Country,Number People
43944,Brazil,Portugal,67488.0
28839,Brazil,Japan,23787.0
42884,Brazil,Paraguay,20978.0
7109,Brazil,Bolivia (Plurinational State of),17860.0
28309,Brazil,Italy,15161.0
...,...,...,...
33344,Brazil,Malawi,0.0
32284,Brazil,Liechtenstein,0.0
49509,Brazil,Sierra Leone,0.0
9494,Brazil,Burundi,0.0


In [11]:
alt.Chart(df_dados_brazil.nlargest(10, 'Number People')).mark_bar().encode(
    x=alt.X('Origin Country',
            title='País de Origem',
            axis=alt.Axis(labelAngle=45),
            sort='-y'),
    y=alt.Y('Number People',
            title='Quantidade Imigrantes'),

    tooltip=['Origin Country', 'Number People']
).properties(
    title='Portugal é o país que mais imigrou para o Brasil em 2015'
    ).interactive()

In [12]:
from vega_datasets import data

In [13]:
df_cars = data.cars()

In [14]:
# ESTA NO MODO TIDY, pois não há coluna redundante (duas colunas ou mais ou a mesma informação)

In [15]:
df_cars.head()

Unnamed: 0,Name,Miles_per_Gallon,Cylinders,Displacement,Horsepower,Weight_in_lbs,Acceleration,Year,Origin
0,chevrolet chevelle malibu,18.0,8,307.0,130.0,3504,12.0,1970-01-01,USA
1,buick skylark 320,15.0,8,350.0,165.0,3693,11.5,1970-01-01,USA
2,plymouth satellite,18.0,8,318.0,150.0,3436,11.0,1970-01-01,USA
3,amc rebel sst,16.0,8,304.0,150.0,3433,12.0,1970-01-01,USA
4,ford torino,17.0,8,302.0,140.0,3449,10.5,1970-01-01,USA


In [16]:
df_cars['Km_per_Liter'] = df_cars['Miles_per_Gallon']*(1.8/3.78)

In [17]:
alt.Chart(df_cars).mark_point(filled=True, size=50, opacity=0.6).encode(
    x = alt.X('Horsepower', title='HP'),
    y = alt.Y('Miles_per_Gallon', title='Milhas por Galão'),
    color = alt.Color('Origin'),
    tooltip=['Origin', 'Horsepower', 'Miles_per_Gallon']
).interactive()

In [18]:
alt.Chart(df_cars).mark_bar().encode(
    x = 'mean(Miles_per_Gallon)',
    y = 'Origin',
    color = 'Origin'
).interactive()

In [19]:
# 1 gallon = 3.785411784 liter
alt.Chart(df_cars).mark_bar().encode(
    x = alt.X('Km_per_Liter', bin=alt.Bin(maxbins=33)),
    y = 'count()',
    tooltip = ['count()']
).interactive()

In [20]:
points = alt.Chart(df_cars).mark_point(filled=True).encode(
    x = 'Year:T',
    y = alt.Y('Km_per_Liter:Q', title="Km por Litro"),
    color = 'Origin'
).properties(width=500)

In [21]:
lines = alt.Chart(df_cars).mark_line().encode(
    x='Year:T',
    y='mean(Km_per_Liter)',
    color = 'Origin'
).interactive()

In [22]:
points | lines

In [23]:
interval = alt.selection_interval()

In [24]:
base = alt.Chart(df_cars).mark_point(filled=True, size=100).encode(
    y = 'Km_per_Liter',
    # O que esta no intervalo, ficara da cor da origem, o que esta fora é cinza
    color = alt.condition(interval, 'Origin', alt.value('gray')),
    tooltip='Name'
).properties(
    selection = interval
)

In [25]:
base.encode(x = 'Acceleration')

In [26]:
base.encode(x = 'Acceleration') | base.encode(x = 'Horsepower')