In [197]:
import pandas as pd
import numpy as np
from IPython.core.display import display, HTML
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import matplotlib.dates as mdates
import matplotlib.ticker as ticker

warnings.filterwarnings('ignore')
np.random.seed(42)
pd.set_option("display.max_rows", None)
pd.options.display.float_format = "{:.2f}".format
display(HTML("<style>.container { width:90% !important; }</style>"))

In [204]:
df = pd.read_excel('./ExportacaoVinhos.xlsx')
df.head()

Unnamed: 0,Pais,2008_KG,2008_USD,2009_KG,2009_USD,2010_KG,2010_USD,2011_KG,2011_USD,2012_KG,...,2018_KG,2018_USD,2019_KG,2019_USD,2020_KG,2020_USD,2021_KG,2021_USD,2022_KG,2022_USD
0,Afeganistão,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,11,46,0,0
1,África do Sul,0,0,0,0,0,0,0,0,0,...,0,0,26,95,4,21,0,0,0,0
2,"Alemanha, República Democrática",265742,429970,225086,393482,27715,138666,36070,144150,8189,...,10794,45382,3660,25467,6261,32605,2698,6741,7630,45367
3,Angola,25721,71083,54786,84235,33557,189891,13889,69001,2833,...,477,709,345,1065,0,0,0,0,4068,4761
4,Anguilla,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [205]:
df_melted = pd.melt(df, id_vars=['Pais'], var_name='Ano e Valor', value_name='Quantidade')
df_melted[['Ano', 'Valor']] = df_melted['Ano e Valor'].str.split('_', expand=True)
df_exportacao = df_melted.pivot(index=['Pais', 'Ano'], columns='Valor', values='Quantidade').reset_index()
df_exportacao.columns.name = None 
df_exportacao = df_exportacao.rename(columns={'Litros': 'Litros', 'USD': 'USD'})
df_exportacao.tail()

Unnamed: 0,Pais,Ano,KG,USD
1915,Áustria,2018,0,0
1916,Áustria,2019,0,0
1917,Áustria,2020,0,0
1918,Áustria,2021,0,0
1919,Áustria,2022,6,212


In [209]:
df_exportacao['Ano'] = pd.to_datetime(df_exportacao['Ano'], format='%Y')


In [210]:
df_exportacao['Origem'] = 'Brasil'
df_exportacao.head()

Unnamed: 0,Destino,Ano,Litros,USD,Origem
0,Afeganistão,2008-01-01,0,0,Brasil
1,Afeganistão,2009-01-01,0,0,Brasil
2,Afeganistão,2010-01-01,0,0,Brasil
3,Afeganistão,2011-01-01,0,0,Brasil
4,Afeganistão,2012-01-01,0,0,Brasil


In [211]:
df_exportacao.rename(columns={'Pais': 'Destino'}, inplace=True)
df_exportacao.rename(columns={'KG': 'Litros'}, inplace=True)
df_exportacao.head()

Unnamed: 0,Destino,Ano,Litros,USD,Origem
0,Afeganistão,2008-01-01,0,0,Brasil
1,Afeganistão,2009-01-01,0,0,Brasil
2,Afeganistão,2010-01-01,0,0,Brasil
3,Afeganistão,2011-01-01,0,0,Brasil
4,Afeganistão,2012-01-01,0,0,Brasil


In [212]:
df_exportacao.shape

(1920, 5)

In [213]:
df_exportacao.describe()

Unnamed: 0,Ano,Litros,USD
count,1920,1920.0,1920.0
mean,2015-01-01 03:12:00,45824.18,58668.91
min,2008-01-01 00:00:00,0.0,0.0
25%,2011-01-01 00:00:00,0.0,0.0
50%,2015-01-01 00:00:00,0.0,0.0
75%,2019-01-01 00:00:00,1006.25,4000.5
max,2022-01-01 00:00:00,21912914.0,14795694.0
std,,598815.48,498786.26


In [214]:
df_exportacao.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1920 entries, 0 to 1919
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   Destino  1920 non-null   object        
 1   Ano      1920 non-null   datetime64[ns]
 2   Litros   1920 non-null   int64         
 3   USD      1920 non-null   int64         
 4   Origem   1920 non-null   object        
dtypes: datetime64[ns](1), int64(2), object(2)
memory usage: 75.1+ KB


In [215]:
df_exportacao.set_index('Destino', inplace=True)
df_exportacao.drop(['Brasil'], inplace=True)
df_exportacao.head()

Unnamed: 0_level_0,Ano,Litros,USD,Origem
Destino,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Afeganistão,2008-01-01,0,0,Brasil
Afeganistão,2009-01-01,0,0,Brasil
Afeganistão,2010-01-01,0,0,Brasil
Afeganistão,2011-01-01,0,0,Brasil
Afeganistão,2012-01-01,0,0,Brasil


In [216]:
total_kg = df_exportacao.groupby(['Destino']).sum('Litros').sort_values(by='Litros', ascending=False)
paises_remover = total_kg[total_kg['Litros'] == 0].index
paises_remover

Index(['Iraque', 'Costa do Marfim', 'Jamaica', 'Tanzânia', 'Ilhas Virgens',
       'República Dominicana', 'Tunísia', 'Líbano', 'Senegal', 'Anguilla',
       'Porto Rico'],
      dtype='object', name='Destino')

In [217]:
df_exportacao.drop(paises_remover, inplace=True, errors='ignore')

In [219]:
df_exportacao.index.nunique()

116

## Nos últimos 15 anos nós exportamos vinho para 116 Países.

In [220]:
df_total_por_pais = df_exportacao.groupby('Destino').sum(['Litros','USD']).sort_values('Litros', ascending=False)
top_10_consumidores = df_total_por_pais.head(10).index
top_10_consumidores

Index(['Rússia', 'Paraguai', 'Estados Unidos', 'China', 'Espanha', 'Haiti',
       'Reino Unido', 'Países Baixos', 'Japão',
       'Alemanha, República Democrática'],
      dtype='object', name='Destino')

In [221]:
df_total_por_pais.head(10)


Unnamed: 0_level_0,Litros,USD
Destino,Unnamed: 1_level_1,Unnamed: 2_level_1
Rússia,39029799,25504484
Paraguai,29214770,38719031
Estados Unidos,3563355,9684567
China,2509458,4746525
Espanha,1993000,3808552
Haiti,1791603,2327208
Reino Unido,1239551,4711464
Países Baixos,1236154,3791611
Japão,1181692,2377716
"Alemanha, República Democrática",909051,2546394


#### Os 10 Países que mais importam vinhos Brasileiros são: Rússia, Paraguai, Estados Unidos, China, Espanha, Haiti,Reino Unido, Países Baixos, Japão e Alemanha.

In [223]:
df_exportacao.reset_index(inplace=True)
df_exportacao = df_exportacao[['Ano', 'Destino', 'Origem', 'Litros', 'USD']]
df_exportacao.head()

Unnamed: 0,Ano,Destino,Origem,Litros,USD
0,2008-01-01,Afeganistão,Brasil,0,0
1,2009-01-01,Afeganistão,Brasil,0,0
2,2010-01-01,Afeganistão,Brasil,0,0
3,2011-01-01,Afeganistão,Brasil,0,0
4,2012-01-01,Afeganistão,Brasil,0,0


In [231]:
df_total_por_pais = df_exportacao.groupby(['Origem','Destino']).sum(['Litros','USD']).sort_values('Litros', ascending=False)
df_total_por_pais.reset_index(inplace=True)
df_total_por_pais.head()

Unnamed: 0,Origem,Destino,Litros,USD
0,Brasil,Rússia,39029799,25504484
1,Brasil,Paraguai,29214770,38719031
2,Brasil,Estados Unidos,3563355,9684567
3,Brasil,China,2509458,4746525
4,Brasil,Espanha,1993000,3808552


In [233]:
df_total_por_pais.to_excel('./ExportacaoVinhosTotais.xlsx', index=False)

In [58]:
# # import the library
# import dtale

# # open the analysis in browser
# dtale.show(df_exportacao).open_browser()