# Working with Dates and Time

In [7]:
# Read data from MSExcel spreadsheets
import pandas as pd

df1 = pd.read_excel("./datasets/Aracaju.xlsx")
df2 = pd.read_excel("./datasets/Fortaleza.xlsx")
df3 = pd.read_excel("./datasets/Natal.xlsx")
df4 = pd.read_excel("./datasets/Recife.xlsx")
df5 = pd.read_excel("./datasets/Salvador.xlsx")

df = pd.concat([df1,df2,df3,df4,df5])
df = df.rename(columns={"Cidade":"CITY", "Data": "DATE", "Vendas": "SALES", "LojaID": "STORE_ID", "Qtde": "QUANTITY"})
df["INCOME"] = df["SALES"].mul(df["QUANTITY"])
df.head()

Unnamed: 0,CITY,DATE,SALES,STORE_ID,QUANTITY,INCOME
0,Aracaju,2018-01-01,142.0,1520,1,142.0
1,Aracaju,2018-01-01,14.21,1522,6,85.26
2,Aracaju,2018-01-01,71.55,1520,1,71.55
3,Aracaju,2018-01-01,3.01,1521,7,21.07
4,Aracaju,2018-01-01,24.51,1522,8,196.08


In [8]:
# Transform a column from datetime format to integer
df.DATE = df.DATE.astype("int64")
print(df.dtypes)
df.head()

CITY         object
DATE          int64
SALES       float64
STORE_ID      int64
QUANTITY      int64
INCOME      float64
dtype: object


Unnamed: 0,CITY,DATE,SALES,STORE_ID,QUANTITY,INCOME
0,Aracaju,1514764800000000000,142.0,1520,1,142.0
1,Aracaju,1514764800000000000,14.21,1522,6,85.26
2,Aracaju,1514764800000000000,71.55,1520,1,71.55
3,Aracaju,1514764800000000000,3.01,1521,7,21.07
4,Aracaju,1514764800000000000,24.51,1522,8,196.08


In [9]:
# Transform a column of date values in integer format to datetime format
df["DATE"] = pd.to_datetime(df.DATE)
print(df.dtypes)
df.head()

CITY                object
DATE        datetime64[ns]
SALES              float64
STORE_ID             int64
QUANTITY             int64
INCOME             float64
dtype: object


Unnamed: 0,CITY,DATE,SALES,STORE_ID,QUANTITY,INCOME
0,Aracaju,2018-01-01,142.0,1520,1,142.0
1,Aracaju,2018-01-01,14.21,1522,6,85.26
2,Aracaju,2018-01-01,71.55,1520,1,71.55
3,Aracaju,2018-01-01,3.01,1521,7,21.07
4,Aracaju,2018-01-01,24.51,1522,8,196.08


In [10]:
# Get the income by year
df.groupby(df.DATE.dt.year)["INCOME"].sum()

DATE
2018    118176.53
2019    228246.45
Name: INCOME, dtype: float64

In [15]:
# Create columns with year, month, and day from the date column
df["YEAR"], df["MONTH"], df["DAY"] = df["DATE"].dt.year, df["DATE"].dt.month, df["DATE"].dt.day
df.sample(5)

Unnamed: 0,CITY,DATE,SALES,STORE_ID,QUANTITY,INCOME,YEAR,MONTH,DAY
5,Fortaleza,2019-01-01,37.3,1002,5,186.5,2019,1,1
132,Salvador,2019-03-02,19.61,1036,2,39.22,2019,3,2
4,Fortaleza,2019-01-01,14.25,1004,6,85.5,2019,1,1
107,Salvador,2019-01-01,14.7,1035,1,14.7,2019,1,1
52,Salvador,2019-01-01,43.77,1036,2,87.54,2019,1,1


In [16]:
# Get oldest date in the dateframe
df.DATE.min()

Timestamp('2018-01-01 00:00:00')

In [17]:
# Get difference of days from oldest date
df["DAYS_DIFF"] = df["DATE"] - df["DATE"].min()
df.sample(5)

Unnamed: 0,CITY,DATE,SALES,STORE_ID,QUANTITY,INCOME,YEAR,MONTH,DAY,DAYS_DIFF
109,Salvador,2019-03-02,124.37,1035,1,124.37,2019,3,2,425 days
163,Salvador,2019-01-02,17.66,1036,3,52.98,2019,1,2,366 days
62,Natal,2018-02-10,793.0,854,4,3172.0,2018,2,10,40 days
82,Fortaleza,2019-01-01,14.03,1004,5,70.15,2019,1,1,365 days
110,Salvador,2019-03-02,185.93,1036,3,557.79,2019,3,2,425 days


In [34]:
# Create a column of quarter
df["QUARTER"] = df.DATE.dt.quarter
df.sample(5)

Unnamed: 0,CITY,DATE,SALES,STORE_ID,QUANTITY,INCOME,YEAR,MONTH,DAY,DAYS_DIFF,QUARTER
205,Salvador,2019-01-02,196.09,1037,1,196.09,2019,1,2,366 days,1
48,Natal,2018-09-20,676.0,852,4,2704.0,2018,9,20,262 days,3
119,Recife,2019-03-02,152.3,982,7,1066.1,2019,3,2,425 days,1
94,Salvador,2019-01-01,33.24,1037,1,33.24,2019,1,1,365 days,1
65,Natal,2019-04-05,292.0,852,2,584.0,2019,4,5,459 days,2


In [38]:
# Filter sales for a given year and a month
datetime_filter = (df.DATE.dt.year == 2019) & (df["DATE"].dt.month == 3)
sales_march_2019 = df.loc[datetime_filter]
sales_march_2019.sample(20)

Unnamed: 0,CITY,DATE,SALES,STORE_ID,QUANTITY,INCOME,YEAR,MONTH,DAY,DAYS_DIFF,QUARTER
108,Fortaleza,2019-03-02,152.89,981,4,611.56,2019,3,2,425 days,1
117,Fortaleza,2019-03-02,15.42,983,8,123.36,2019,3,2,425 days,1
135,Salvador,2019-03-02,9.95,1037,3,29.85,2019,3,2,425 days,1
110,Recife,2019-03-02,51.98,983,6,311.88,2019,3,2,425 days,1
124,Salvador,2019-03-02,44.82,1034,3,134.46,2019,3,2,425 days,1
126,Fortaleza,2019-03-02,41.87,980,8,334.96,2019,3,2,425 days,1
123,Recife,2019-03-02,20.22,981,6,121.32,2019,3,2,425 days,1
115,Recife,2019-03-02,12.23,981,3,36.69,2019,3,2,425 days,1
138,Fortaleza,2019-03-02,150.38,983,6,902.28,2019,3,2,425 days,1
119,Fortaleza,2019-03-02,152.3,982,7,1066.1,2019,3,2,425 days,1
