# Trabalhando com datas

## Importando a Biblioteca

In [1]:
import pandas as pd

## Fazendo a leitura dos arquivos

In [2]:
df1 = pd.read_excel(r".\datasets\Aracaju.xlsx")
df2 = pd.read_excel(r".\datasets\Fortaleza.xlsx")
df3 = pd.read_excel(r".\datasets\Natal.xlsx")
df4 = pd.read_excel(r".\datasets\Recife.xlsx")
df5 = pd.read_excel(r".\datasets\Salvador.xlsx")

## Agrupando os arquivos

In [3]:
df = pd.concat([df1, df2, df3, df4, df5])

## Criando a coluna Receitas

In [7]:
df['Receita'] = df['Vendas'].mul(df['Qtde'])

## Transformando a coluna de data em tipo inteiro

In [4]:
df['Data'] = df['Data'].astype('int64')
df.dtypes

Cidade     object
Data        int64
Vendas    float64
LojaID      int64
Qtde        int64
dtype: object

## Transformando a coluna de data em tipo Data

In [5]:
df['Data'] = pd.to_datetime(df['Data'])
df.dtypes

Cidade            object
Data      datetime64[ns]
Vendas           float64
LojaID             int64
Qtde               int64
dtype: object

## Agrupar receita por ano

In [8]:
df.groupby(df.Data.dt.year)['Receita'].sum()

Data
2018    118091.27
2019    227348.64
Name: Receita, dtype: float64

## Criando nova coluna com o ano

In [12]:
df['Ano'] = df['Data'].dt.year
df.sample(8)

Unnamed: 0,Cidade,Data,Vendas,LojaID,Qtde,Receita,Ano
6,Salvador,2019-01-01,36.3,1034,2,72.6,2019
38,Recife,2019-01-01,104.53,983,4,418.12,2019
164,Salvador,2019-01-02,6.59,1035,1,6.59,2019
108,Fortaleza,2019-03-02,152.89,981,4,611.56,2019
0,Natal,2018-08-27,606.0,853,2,1212.0,2018
3,Recife,2019-02-10,160.54,981,3,481.62,2019
150,Salvador,2019-01-02,11.13,1035,2,22.26,2019
29,Recife,2019-01-01,10.18,980,8,81.44,2019


## Criando coluna com mês e dia

In [17]:
df['Mes'], df['Dia'] = df['Data'].dt.month, df['Data'].dt.day
df.sample(5)

Unnamed: 0,Cidade,Data,Vendas,LojaID,Qtde,Receita,Ano,Mes,Dia
50,Recife,2019-01-01,7.24,983,4,28.96,2019,1,1
62,Aracaju,2018-01-01,28.66,1520,3,85.98,2018,1,1
14,Recife,2019-01-12,102.91,982,3,308.73,2019,1,12
117,Aracaju,2018-01-01,170.97,1521,4,683.88,2018,1,1
124,Recife,2019-03-02,47.98,983,7,335.86,2019,3,2


## Retornando a data mais antiga

In [18]:
df.Data.min()

Timestamp('2018-01-01 00:00:00')

## Calculando a diferença de dias

In [20]:
df['Diferença_dias'] = df['Data'] - df.Data.min()
df.sample(5)

Unnamed: 0,Cidade,Data,Vendas,LojaID,Qtde,Receita,Ano,Mes,Dia,Diferença_dias
156,Natal,2019-01-02,36.14,1037,3,108.42,2019,1,2,366 days
19,Salvador,2019-01-01,167.12,1035,1,167.12,2019,1,1,365 days
76,Natal,2019-02-20,364.0,853,2,728.0,2019,2,20,415 days
69,Salvador,2019-01-01,14.79,1035,1,14.79,2019,1,1,365 days
211,Salvador,2019-01-02,141.05,1035,3,423.15,2019,1,2,366 days


## Criando a coluna Trimestre

In [24]:
df['Trimestre'] = df['Data'].dt.quarter
df.sample(5)

Unnamed: 0,Cidade,Data,Vendas,LojaID,Qtde,Receita,Ano,Mes,Dia,Diferença_dias,Trimestre
145,Salvador,2019-01-02,84.62,1034,2,169.24,2019,1,2,366 days,1
63,Aracaju,2018-01-01,39.87,1522,5,199.35,2018,1,1,0 days,1
16,Aracaju,2018-01-01,37.68,1522,10,376.8,2018,1,1,0 days,1
29,Salvador,2019-01-01,9.27,1034,1,9.27,2019,1,1,365 days,1
113,Salvador,2019-03-02,18.9,1034,3,56.7,2019,3,2,425 days,1


## Filtrando as vendas de 2019 no mês de março

In [33]:
filtro_ano = df['Ano']==2019
filtro_mes = df['Mes']==3
df.loc[filtro_ano & filtro_mes].head()

Unnamed: 0,Cidade,Data,Vendas,LojaID,Qtde,Receita,Ano,Mes,Dia,Diferença_dias,Trimestre
108,Fortaleza,2019-03-02,152.89,981,4,611.56,2019,3,2,425 days,1
109,Fortaleza,2019-03-02,18.9,982,6,113.4,2019,3,2,425 days,1
110,Fortaleza,2019-03-02,51.98,983,6,311.88,2019,3,2,425 days,1
111,Fortaleza,2019-03-02,8.0,981,3,24.0,2019,3,2,425 days,1
112,Fortaleza,2019-03-02,133.59,982,1,133.59,2019,3,2,425 days,1
