Librería datetime (Manipulación de fechas y horas) --- 17:36
===

* 17:36 min | Última modificación: Octubre 5, 2021 | [YouTube](https://youtu.be/dwVcyYSYPK8)

In [1]:
from datetime import datetime, date, timedelta
import time

In [2]:
#
# Función today()
# ===============================================
# display() imprime el objeto
# print() imprime un string con formato fecha
#
display(date.today())
print(date.today())

datetime.date(2021, 9, 4)

2021-09-04


In [3]:
#
# Fecha en isoformat
#
date.today().isoformat()

'2021-09-04'

In [4]:
#
# Método strftime()
# ===============================================
#
# Formatos:
#   %d -- 01, 02, ...
#   %m -- 01, ..., 12
#   %Y -- 0001, ..., 9999
#   %y -- 00, ..., 99
#
display(
    date.today().strftime('%y/%m/%d'),
    date.today().strftime('%Y-%m-%d'),
    date.today().strftime('%Y %m %d')
)

'21/09/04'

'2021-09-04'

'2021 09 04'

In [5]:
#
# Función now()
# =============================================================================
#

display(
    datetime.now(),
    datetime.utcnow()
)

print(
    datetime.now(),
    datetime.utcnow(),
    sep='\n',
)

datetime.datetime(2021, 9, 4, 3, 53, 53, 84278)

datetime.datetime(2021, 9, 4, 3, 53, 53, 84285)

2021-09-04 03:53:53.087603
2021-09-04 03:53:53.087611


In [6]:
#
# Método isoformat()
#
datetime.now().isoformat(' ')

'2021-09-04 03:53:53.093118'

In [7]:
#
# Funciones equivalentes en SQL
# =============================================================================
#

datetime_val = [
    ['1970-01-01 00:00:00'],
    ['1987-03-05 12:30:15'],
    ['1999-12-31 09:00:00'],
    ['2000-06-04 15:45:30'],
    ['2000-06-04 15:45:30'], 
]

datetime_val

[['1970-01-01 00:00:00'],
 ['1987-03-05 12:30:15'],
 ['1999-12-31 09:00:00'],
 ['2000-06-04 15:45:30'],
 ['2000-06-04 15:45:30']]

In [8]:
#
# Funciones para la extracción de fechas
# =============================================================================
# Las funciones tienen el mismo nombre que en SQL
#
def YEAR(s):
    return datetime.strptime(s, '%Y-%m-%d %H:%M:%S').strftime('%Y')

def MONTH(s):
    return datetime.strptime(s, '%Y-%m-%d %H:%M:%S').month

def MONTHNAME(s):    
    return datetime.strptime(s, '%Y-%m-%d %H:%M:%S').strftime('%B')

def MONTHNAME_ABBR(s):    
    return datetime.strptime(s, '%Y-%m-%d %H:%M:%S').strftime('%b')

def DAYOFMONTH(s):
    return datetime.strptime(s, '%Y-%m-%d %H:%M:%S').strftime('%d')

def DAYNAME(s):
    return datetime.strptime(s, '%Y-%m-%d %H:%M:%S').strftime('%A')

def DAYNAME_ABBR(s):
    return datetime.strptime(s, '%Y-%m-%d %H:%M:%S').strftime('%a')

def WEEKDAY(s):
    return datetime.strptime(s, '%Y-%m-%d %H:%M:%S').strftime('%w')

def DAYOFYEAR(s):
    return datetime.strptime(s, '%Y-%m-%d %H:%M:%S').strftime('%J')

def WEEKOFYEAR(s):
    return datetime.strptime(s, '%Y-%m-%d %H:%M:%S').strftime('%W')

In [9]:
[
    [
        row[0],
        YEAR(row[0]),
        MONTH(row[0]),
        MONTHNAME(row[0]),
        MONTHNAME_ABBR(row[0]),
    ]
    for row in datetime_val
]

[['1970-01-01 00:00:00', '1970', 1, 'January', 'Jan'],
 ['1987-03-05 12:30:15', '1987', 3, 'March', 'Mar'],
 ['1999-12-31 09:00:00', '1999', 12, 'December', 'Dec'],
 ['2000-06-04 15:45:30', '2000', 6, 'June', 'Jun'],
 ['2000-06-04 15:45:30', '2000', 6, 'June', 'Jun']]

In [10]:
[
    [
        row[0],
        DAYOFMONTH(row[0]),
        DAYNAME(row[0]),
        DAYNAME_ABBR(row[0]),
        WEEKDAY(row[0]),
    ]
    for row in datetime_val
]

[['1970-01-01 00:00:00', '01', 'Thursday', 'Thu', '4'],
 ['1987-03-05 12:30:15', '05', 'Thursday', 'Thu', '4'],
 ['1999-12-31 09:00:00', '31', 'Friday', 'Fri', '5'],
 ['2000-06-04 15:45:30', '04', 'Sunday', 'Sun', '0'],
 ['2000-06-04 15:45:30', '04', 'Sunday', 'Sun', '0']]

In [11]:
#
# Funciones para la extracción de tiempo
# =============================================================================
# Las funciones tienen el mismo nombre que en SQL
#
def HOUR(s):
    return datetime.strptime(s, '%Y-%m-%d %H:%M:%S').strftime('%H')

def MINUTE(s):
    return datetime.strptime(s, '%Y-%m-%d %H:%M:%S').strftime('%M')

def SECOND(s):
    return datetime.strptime(s, '%Y-%m-%d %H:%M:%S').strftime('%S')

#
# Extracción de las componentes de la hora
#
[
    [
        row[0],
        HOUR(row[0]),
        MINUTE(row[0]),
        SECOND(row[0]),
    ]
    for row in datetime_val
]

[['1970-01-01 00:00:00', '00', '00', '00'],
 ['1987-03-05 12:30:15', '12', '30', '15'],
 ['1999-12-31 09:00:00', '09', '00', '00'],
 ['2000-06-04 15:45:30', '15', '45', '30'],
 ['2000-06-04 15:45:30', '15', '45', '30']]

In [12]:
#
# Ejemplo de cambio de formato
# =============================================================================
#
[
    [
        row[0],
        datetime.strptime(row[0], "%Y-%m-%d %H:%M:%S").strftime("%d/%m/%y"),
    ]
    for row in datetime_val
]

[['1970-01-01 00:00:00', '01/01/70'],
 ['1987-03-05 12:30:15', '05/03/87'],
 ['1999-12-31 09:00:00', '31/12/99'],
 ['2000-06-04 15:45:30', '04/06/00'],
 ['2000-06-04 15:45:30', '04/06/00']]

In [13]:
#
# Cómputos con fechas
# =============================================================================
#
interval = [
    datetime.now() - datetime.strptime(row[0], "%Y-%m-%d %H:%M:%S")
    for row in datetime_val
]

for td in interval:
    display(td)
    print(td)
    print()

datetime.timedelta(18874, 14033, 156683)

18874 days, 3:53:53.156683



datetime.timedelta(12601, 55418, 156724)

12601 days, 15:23:38.156724



datetime.timedelta(7917, 68033, 156752)

7917 days, 18:53:53.156752



datetime.timedelta(7761, 43703, 156778)

7761 days, 12:08:23.156778



datetime.timedelta(7761, 43703, 156836)

7761 days, 12:08:23.156836



In [14]:
#
# Suma de un incremento
# =============================================================================
#
print(datetime.now())
print(datetime.now() + timedelta(days=5))

2021-09-04 03:53:53.171253
2021-09-09 03:53:53.171405


In [15]:
#
# Suma 60 horas a cada fecha
#
[[(datetime.strptime(row[0], '%Y-%m-%d %H:%M:%S') +
  timedelta(hours=60)).isoformat(' ')] for row in datetime_val]

[['1970-01-03 12:00:00'],
 ['1987-03-08 00:30:15'],
 ['2000-01-02 21:00:00'],
 ['2000-06-07 03:45:30'],
 ['2000-06-07 03:45:30']]

In [16]:
#
# Calcula la edad
#
ages = [
    datetime.now() - datetime.strptime(row[0], "%Y-%m-%d %H:%M:%S")
    for row in datetime_val
]
for age in ages:
    print(age)

18874 days, 3:53:53.186915
12601 days, 15:23:38.186963
7917 days, 18:53:53.186992
7761 days, 12:08:23.187013
7761 days, 12:08:23.187030


**Caso práctico**

El archivo 'chicago_crime.csv' con tiene la información sobre reportes de hechos delictivos por fecha para la ciudad de Chicago. Responda las siguientes preguntas:

* Cuales son los meses del año con más crímenes?

* Cuáles son las ubicaciones más frecuentes por cada mes del año 2016?

In [26]:
base_url = "https://raw.githubusercontent.com/jdvelasq/datalabs/master/datasets/" 
filenames = [
    "chicago_crime.csv", 
    "cta_daily_station_totals.csv", 
    "cta_daily_summary_totals.csv",
]

for filename in filenames:
    !wget --quiet {base_url + filename} -P /tmp/

In [28]:
!head /tmp/chicago_crime.csv

Date,Block,Primary Type,Description,Location Description,Arrest,Domestic,District
05/23/2016 05:35:00 PM,024XX W DIVISION ST,ASSAULT,SIMPLE,STREET,false,true,14
03/26/2016 08:20:00 PM,019XX W HOWARD ST,BURGLARY,FORCIBLE ENTRY,SMALL RETAIL STORE,false,false,24
04/25/2016 03:05:00 PM,001XX W 79TH ST,THEFT,RETAIL THEFT,DEPARTMENT STORE,true,false,6
04/26/2016 05:30:00 PM,010XX N PINE AVE,BATTERY,SIMPLE,SIDEWALK,false,false,15
06/19/2016 01:15:00 AM,027XX W AUGUSTA BLVD,BATTERY,AGGRAVATED: HANDGUN,SIDEWALK,false,false,12
05/28/2016 08:00:00 PM,070XX S ASHLAND AVE,BATTERY,DOMESTIC BATTERY SIMPLE,GAS STATION,false,true,7
07/03/2016 03:43:00 PM,0000X N STATE ST,THEFT,RETAIL THEFT,OTHER,false,false,1
06/11/2016 06:55:00 PM,044XX W MAYPOLE AVE,PUBLIC PEACE VIOLATION,RECKLESS CONDUCT,STREET,true,false,11
10/04/2016 10:20:00 AM,016XX W 63RD ST,BATTERY,SIMPLE,STREET,true,false,7


In [36]:
#
# head extrae la primera linea del archivo
# tr convierte la ',' en '\n' para que se imprima una columna por línea
# nl numera cada línea de la salida
# 
!head -n 1 /tmp/chicago_crime.csv | tr ',' '\n' | nl

     1	Date
     2	Block
     3	Primary Type
     4	Description
     5	Location Description
     6	Arrest
     7	Domestic
     8	District


In [44]:
import csv
from collections import Counter, defaultdict
from datetime import datetime


def load_crime_data():
    """Retorna los registros con los campos de interes como una lista de tuplas."""

    csvfile = open("/tmp/chicago_crime.csv", "r")

    next(csvfile)

    crime_data = []

    for row in csv.reader(csvfile):
        #
        # Se agregan los siguientes campos:
        #
        #   0: Date,
        #   2: Primary Type,
        #   4: Location Description
        #   5: Arrest
        #
        crime_data.append((row[0], row[2], row[4], row[5]))

    return crime_data


def compute_crimes_by_month(crime_data):
    """Calcula la cantidad de crimenes para cada mes."""

    crimes_by_month = Counter()

    for row in crime_data:

        #
        # Convierte el string a un objeto fecha:
        # 05/23/2016 05:35:00 PM
        #
        date = datetime.strptime(row[0], "%m/%d/%Y %I:%M:%S %p")

        #
        # Contador
        #
        crimes_by_month[date.month] += 1

    return crimes_by_month


def compute_locations_by_month(crime_data):

    #
    # Crea un defaultdict para realizar el conteo.
    # El valor por defecto del diccionario es una
    # lista vacia
    #
    locations_by_month = defaultdict(list)

    for row in crime_data:

        #
        # Convierte el string a un objeto fecha:
        # 05/23/2016 05:35:00 PM
        #
        date = datetime.strptime(row[0], "%m/%d/%Y %I:%M:%S %p")

        if date.year == 2016:
            locations_by_month[date.month].append(row[2])

    locations_by_month = {
        key: Counter(value).most_common(5) for key, value in locations_by_month.items()
    }

    return locations_by_month


def create_report(crimes_by_month, locations_by_month):
    print("Crimes by Month:")
    print(crimes_by_month.most_common(3))
    print()
    print("Locations by Month:")
    for key, value in locations_by_month.items():
        print(key, value)


crime_data = load_crime_data()
crimes_by_month = compute_crimes_by_month(crime_data)
locations_by_month = compute_locations_by_month(crime_data)
create_report(crimes_by_month, locations_by_month)

Crimes by Month:
[(1, 1948), (2, 1862), (7, 1257)]

Locations by Month:
5 [('STREET', 241), ('RESIDENCE', 175), ('APARTMENT', 128), ('SIDEWALK', 111), ('OTHER', 41)]
3 [('STREET', 240), ('RESIDENCE', 190), ('APARTMENT', 139), ('SIDEWALK', 99), ('OTHER', 52)]
4 [('STREET', 213), ('RESIDENCE', 171), ('APARTMENT', 152), ('SIDEWALK', 96), ('OTHER', 40)]
6 [('STREET', 245), ('RESIDENCE', 164), ('APARTMENT', 159), ('SIDEWALK', 123), ('PARKING LOT/GARAGE(NON.RESID.)', 44)]
7 [('STREET', 309), ('RESIDENCE', 177), ('APARTMENT', 166), ('SIDEWALK', 125), ('OTHER', 47)]
10 [('STREET', 248), ('RESIDENCE', 206), ('APARTMENT', 122), ('SIDEWALK', 92), ('OTHER', 62)]
12 [('STREET', 207), ('RESIDENCE', 158), ('APARTMENT', 136), ('OTHER', 47), ('SIDEWALK', 46)]
1 [('STREET', 196), ('RESIDENCE', 160), ('APARTMENT', 153), ('SIDEWALK', 72), ('PARKING LOT/GARAGE(NON.RESID.)', 43)]
9 [('STREET', 279), ('RESIDENCE', 183), ('APARTMENT', 144), ('SIDEWALK', 121), ('OTHER', 39)]
11 [('STREET', 236), ('RESIDENCE', 

In [47]:
def load_crimes_by_district():
    
    #
    # Carga el archivo
    #
    csvfile = open("/tmp/chicago_crime.csv", "r")

    #
    # Almacena los delitos como una lista
    #
    crimes_by_district = defaultdict(list)

    for row in csv.DictReader(csvfile):
        
        # 
        # Elimina el campo District y almacena el 
        # valor por cada registro
        #
        district = row.pop('District')

        # 
        # Almacena el resto de los campos en una
        # lista
        #
        crimes_by_district[district].append(row)
        
    return crimes_by_district

def count_crimes_by_district_and_year(crimes_by_district):
    
    #
    # Por cada distrito crea un contador vacio
    #
    crimes_by_district_and_year = defaultdict(Counter)
    
    
    for district, crimes in crimes_by_district.items():

        for crime in crimes:

            if crime['Arrest'] == 'true':
                
                year = datetime.strptime(crime['Date'], '%m/%d/%Y %I:%M:%S %p').year
                
                crimes_by_district_and_year[district][year] += 1

    return crimes_by_district_and_year


crimes_by_district = load_crimes_by_district()

crimes_by_district_and_year = count_crimes_by_district_and_year(crimes_by_district)

crimes_by_district_and_year

defaultdict(collections.Counter,
            {'14': Counter({2016: 59, 2017: 8}),
             '24': Counter({2017: 10, 2016: 51}),
             '6': Counter({2016: 157, 2017: 32}),
             '15': Counter({2016: 154, 2017: 16}),
             '12': Counter({2016: 72, 2017: 9}),
             '7': Counter({2016: 181, 2017: 27}),
             '1': Counter({2016: 124, 2017: 15}),
             '11': Counter({2016: 275, 2017: 53}),
             '18': Counter({2016: 92, 2017: 17}),
             '22': Counter({2016: 78, 2017: 12}),
             '5': Counter({2017: 30, 2016: 149}),
             '16': Counter({2016: 66, 2017: 9}),
             '9': Counter({2016: 116, 2017: 17}),
             '8': Counter({2016: 124, 2017: 26}),
             '3': Counter({2016: 98, 2017: 18}),
             '2': Counter({2016: 84, 2017: 15}),
             '19': Counter({2016: 88, 2017: 11}),
             '10': Counter({2016: 144, 2017: 20}),
             '4': Counter({2016: 134, 2017: 15}),
             '17': 