In [1]:
import csv
with open('transactions.csv') as file:
    reader = csv.reader(file, delimiter=',')
    for row in reader:
        transaction_date = row[0]
        break
print(transaction_date)

28/5/2019:15:39:41 +0000


<B>DATETIME</B>

In [2]:
import datetime as dt

In [3]:
# Formatação
formatted_date = dt.datetime.strptime(transaction_date[:-6], '%d/%m/%Y:%H:%M:%S')
print(formatted_date)

2019-05-28 15:39:41


In [4]:
# Colocando Timezone
import pytz
formatted_date_tz = formatted_date.replace(tzinfo=pytz.UTC)
my_timezone = pytz.timezone('America/Sao_Paulo')
local_date = formatted_date_tz.astimezone(my_timezone)
print(local_date.strftime('%d/%m/%Y %H:%M:%S %Z'))

28/05/2019 12:39:41 -03


In [5]:
# Arredonda por hora
rounded_date = local_date.replace(minute=0, second=0, microsecond=0)
print(rounded_date)

2019-05-28 12:00:00-03:00


In [6]:
# Comparando datas
hoje = dt.datetime.now()
print(hoje)

2020-05-21 02:28:04.230032


In [7]:
semana_passada = dt.datetime.now() - dt.timedelta(days=7)
print(semana_passada)

2020-05-14 02:28:22.773856


In [8]:
# Criando faixas de datas
from dateutil import rrule

list(rrule.rrule(rrule.WEEKLY, dtstart=hoje, count=4))

[datetime.datetime(2020, 5, 21, 2, 28, 4),
 datetime.datetime(2020, 5, 28, 2, 28, 4),
 datetime.datetime(2020, 6, 4, 2, 28, 4),
 datetime.datetime(2020, 6, 11, 2, 28, 4)]

<B>PENDULUM</B>

In [9]:
print(transaction_date)

28/5/2019:15:39:41 +0000


In [10]:
import pendulum as pl

In [11]:
# Formatação
trans_date = pl.from_format(transaction_date[:-6], 'DD/MM/YYYY:H:m:s')
print(trans_date)

2019-05-28T15:39:41+00:00


In [12]:
# Timezone
local = trans_date.in_timezone('America/Sao_Paulo')
print(local)

2019-05-28T12:39:41-03:00


In [13]:
# Arredonda por hora
rounded = local.start_of('hour')
print(rounded)

2019-05-28T12:00:00-03:00


In [14]:
# Comparando datas 
monday = local.start_of('week')
print(monday)

2019-05-27T00:00:00-03:00


In [15]:
next_monday = monday.add(weeks=1)
print(next_monday)

2019-06-03T00:00:00-03:00


In [17]:
# Faixas de datas
start_day = pl.datetime(2020, 1, 1)
end_day   = pl.datetime(2020, 1, 15)
period    = pl.period(start_day, end_day)
for week in period.range('days'):
    print(week)

2020-01-01T00:00:00+00:00
2020-01-02T00:00:00+00:00
2020-01-03T00:00:00+00:00
2020-01-04T00:00:00+00:00
2020-01-05T00:00:00+00:00
2020-01-06T00:00:00+00:00
2020-01-07T00:00:00+00:00
2020-01-08T00:00:00+00:00
2020-01-09T00:00:00+00:00
2020-01-10T00:00:00+00:00
2020-01-11T00:00:00+00:00
2020-01-12T00:00:00+00:00
2020-01-13T00:00:00+00:00
2020-01-14T00:00:00+00:00
2020-01-15T00:00:00+00:00


In [18]:
print(period.days)

14


<B>PANDAS</B>

In [19]:
import pandas as pd
import warnings
warnings.simplefilter(action='ignore')

df = pd.read_csv('transactions.csv')
df.columns = ['transaction_date', 'transaction_operation']

In [20]:
df.head()

Unnamed: 0,transaction_date,transaction_operation
0,1/12/2019:21:9:43 +0000,update
1,17/4/2019:2:8:43 +0000,insert
2,14/4/2019:5:51:49 +0000,select
3,17/3/2019:23:28:7 +0000,insert
4,19/3/2019:7:56:27 +0000,delete


In [21]:
df.dtypes

transaction_date         object
transaction_operation    object
dtype: object

In [22]:
# Formatação
df['transaction_date'] = pd.to_datetime(df['transaction_date'], 
                                        format='%d/%m/%Y:%H:%M:%S +0000',
                                        utc=True)

In [23]:
df.dtypes

transaction_date         datetime64[ns, UTC]
transaction_operation                 object
dtype: object

In [24]:
df.set_index('transaction_date', inplace=True)

In [25]:
# Timezone
df.index = df.index.tz_convert('America/Sao_Paulo')

In [26]:
df.head()

Unnamed: 0_level_0,transaction_operation
transaction_date,Unnamed: 1_level_1
2019-12-01 18:09:43-03:00,update
2019-04-16 23:08:43-03:00,insert
2019-04-14 02:51:49-03:00,select
2019-03-17 20:28:07-03:00,insert
2019-03-19 04:56:27-03:00,delete


In [27]:
# Arredondamento
df.index = df.index.floor('1H')
df.head()

Unnamed: 0_level_0,transaction_operation
transaction_date,Unnamed: 1_level_1
2019-12-01 18:00:00-03:00,update
2019-04-16 23:00:00-03:00,insert
2019-04-14 02:00:00-03:00,select
2019-03-17 20:00:00-03:00,insert
2019-03-19 04:00:00-03:00,delete


In [28]:
# Comparando datas
df['inicio_semana'] = df.index.to_period('W').start_time
df.head()

Unnamed: 0_level_0,transaction_operation,inicio_semana
transaction_date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-12-01 18:00:00-03:00,update,2019-11-25
2019-04-16 23:00:00-03:00,insert,2019-04-15
2019-04-14 02:00:00-03:00,select,2019-04-08
2019-03-17 20:00:00-03:00,insert,2019-03-11
2019-03-19 04:00:00-03:00,delete,2019-03-18


In [29]:
df['inicio_prox_semana'] = df['inicio_semana'] + pd.DateOffset(weeks=1)
df.head()

Unnamed: 0_level_0,transaction_operation,inicio_semana,inicio_prox_semana
transaction_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-12-01 18:00:00-03:00,update,2019-11-25,2019-12-02
2019-04-16 23:00:00-03:00,insert,2019-04-15,2019-04-22
2019-04-14 02:00:00-03:00,select,2019-04-08,2019-04-15
2019-03-17 20:00:00-03:00,insert,2019-03-11,2019-03-18
2019-03-19 04:00:00-03:00,delete,2019-03-18,2019-03-25


In [30]:
# Faixas de datas
pd.date_range(dt.datetime.now(), periods=10, freq='D')

DatetimeIndex(['2020-05-21 02:48:40.236149', '2020-05-22 02:48:40.236149',
               '2020-05-23 02:48:40.236149', '2020-05-24 02:48:40.236149',
               '2020-05-25 02:48:40.236149', '2020-05-26 02:48:40.236149',
               '2020-05-27 02:48:40.236149', '2020-05-28 02:48:40.236149',
               '2020-05-29 02:48:40.236149', '2020-05-30 02:48:40.236149'],
              dtype='datetime64[ns]', freq='D')

In [31]:
pd.date_range(dt.datetime.now(), dt.datetime(2020, 6, 30), freq='W')

DatetimeIndex(['2020-05-24 02:49:03.762162', '2020-05-31 02:49:03.762162',
               '2020-06-07 02:49:03.762162', '2020-06-14 02:49:03.762162',
               '2020-06-21 02:49:03.762162', '2020-06-28 02:49:03.762162'],
              dtype='datetime64[ns]', freq='W-SUN')