In [1]:
import datetime
import pandas as pd
import os

In [2]:
os.chdir('../../')
print(os.listdir('.'))

['.DS_Store', 'solutions', 'code snippets', 'README.md', 'notes', '.gitignore', 'exercises', '.git', 'using_apis']


In [3]:
os.chdir('exercises/03 dates_and_times')
print(os.listdir('.'))

['datetime.md', '.ipynb_checkpoints', 'processing_datetimes.ipynb', 'NY_rodent_inspections_sample_small.csv']


In [4]:
csv_name = 'NY_rodent_inspections_sample_small.csv'
rodent_dataframe = pd.read_csv(csv_name)
rodent_dataframe.head()

Unnamed: 0,INSPECTION_TYPE,JOB_TICKET_OR_WORK_ORDER_ID,JOB_ID,JOB_PROGRESS,BBL,BORO_CODE,BLOCK,LOT,HOUSE_NUMBER,STREET_NAME,ZIP_CODE,X_COORD,Y_COORD,LATITUDE,LONGITUDE,BOROUGH,INSPECTION_DATE,RESULT,APPROVED_DATE,LOCATION
0,BAIT,1,PO12965,3,1011470035,1,1147,35,104,WEST 76 STREET,10023,990505,223527,40.780204,-73.977414,Manhattan,10/14/2009 12:00:27 PM,Bait applied,10/14/2009 03:01:46 PM,"(40.7802039792471, -73.9774144709456)"
1,BAIT,2,PO12966,3,1011470034,1,1147,34,102,WEST 76 STREET,10023,990516,223521,40.780188,-73.977375,Manhattan,10/14/2009 12:51:21 PM,Bait applied,10/14/2009 03:02:30 PM,"(40.7801875030438, -73.977374757787)"
2,BAIT,30,PO16966,3,2043370027,2,4337,27,620,THWAITES PLACE,10467,1020110,252216,40.858877,-73.870364,Bronx,11/09/2009 12:59:55 PM,Bait applied,11/10/2009 02:54:52 PM,"(40.8588765781972, -73.8703636422023)"
3,BAIT,31,PO13665,3,2037670077,2,3767,77,1227,WHITEPLAINS ROAD,10472,1022441,242180,40.831321,-73.861994,Bronx,11/09/2009 11:10:16 AM,Bait applied,11/10/2009 02:56:42 PM,"(40.8313209626148, -73.861994089899)"
4,BAIT,38,PO11291,3,1011690057,1,1169,57,2199,BROADWAY,10024,989641,224567,40.783059,-73.980533,Manhattan,11/10/2009 08:40:42 AM,Bait applied,11/17/2009 11:39:11 AM,"(40.7830590725833, -73.9805333640688)"


In [5]:
# convert strings to datetime objects that we can compute with.  It takes two arguments, a string to convert and a format descriptor of the date to be converted'

original_format = '%m/%d/%Y %H:%M:%S %p'
for inspection_date_strings in rodent_dataframe['INSPECTION_DATE']:
    inspection_datetime = datetime.datetime.strptime(inspection_date_strings, original_format)

In [6]:
for approval_date_strings in rodent_dataframe['APPROVED_DATE']:
    approval_datetime = datetime.datetime.strptime(approval_date_strings, original_format)

In [7]:
# one containing the weekday name when the inspection took place (monday, tuesday, wednesday,...).

# print('the day-of-week is:',  today_date.weekday())
# print('the weekday is', a_datetime.strftime("%w"), '(weekday as a number 0-6, 0 is Sunday)')
# row['inspection_datetime'].strftime("%A")

In [8]:
# one containing the name of the month the inspection took place in.

# print('the month is:', today_date.month)
# print('the month is', a_datetime.strftime("%B"), '(month name, full version)')
# #(lambda row: row['inspection_datetime'].strftime("%B")

In [9]:
# one containing the iso-week in which the inspection took place. the calender bit returns a 3-tuple.

# (isoyear, isoweek, isoweekday) = today_date.isocalendar()
# print('the iso year:', isoyear, ', isoweek:', isoweek, ', and isoweekday:', isoweekday)

# define a new function to pass in row as argument:

def format_iso_week(datetime_placeholder):
    the_date = datetime_placeholder.date()
    iso_year, iso_week, iso_weekday = the_date.isocalendar()
    iso_week_string = 'Y:' + str(iso_year) + "," + 'W:' + str(iso_week)
    return iso_week_string

In [10]:
# one column containing how long did it take for an inspection to be approved (in appropriate time units).
# if you subtract datetimes, you get a timedelta object for the interval between the dates

# time_diff = christmas_start - now_datetime 
# print('christmas is only', time_diff, 'away')

# # it looks prettier to only count the days (note the rounding up!)
# date_diff = christmas_start.date() - a_datetime.date() # sets the clock time on both to 00:00:00.000000
# print('christmas is only', date_diff.days, 'days away')

In [11]:
# making the new columns

# Objects passed to the function are Series objects whose index is either the DataFrame’s index (axis=0) 
# or the DataFrame’s columns (axis=1). By default (result_type=None), the final return type is inferred 
# from the return type of the applied function. Otherwise, it depends on the result_type argument.

rodent_dataframe['inspection_datetime']   = rodent_dataframe.apply(lambda row: datetime.datetime.strptime(row['INSPECTION_DATE'], original_format), axis=1)

rodent_dataframe['approval_datetime']     = rodent_dataframe.apply(lambda row: datetime.datetime.strptime(row['APPROVED_DATE'],   original_format), axis=1)



rodent_dataframe['1. inspection_weekday']    = rodent_dataframe.apply(lambda row: row['inspection_datetime'].strftime("%A"), axis=1)

rodent_dataframe['2. inspection_month']      = rodent_dataframe.apply(lambda row: row['inspection_datetime'].strftime("%B"), axis=1)

rodent_dataframe['3. isoweek']               = rodent_dataframe.apply(lambda row: format_iso_week(row['inspection_datetime']), axis=1)

rodent_dataframe['4. days_taken_for_approval'] = rodent_dataframe.apply(lambda row: (row['approval_datetime'].date() - row['inspection_datetime'].date()), axis=1)


In [12]:
rodent_dataframe.head()

Unnamed: 0,INSPECTION_TYPE,JOB_TICKET_OR_WORK_ORDER_ID,JOB_ID,JOB_PROGRESS,BBL,BORO_CODE,BLOCK,LOT,HOUSE_NUMBER,STREET_NAME,...,INSPECTION_DATE,RESULT,APPROVED_DATE,LOCATION,inspection_datetime,approval_datetime,1. inspection_weekday,2. inspection_month,3. isoweek,4. days_taken_for_approval
0,BAIT,1,PO12965,3,1011470035,1,1147,35,104,WEST 76 STREET,...,10/14/2009 12:00:27 PM,Bait applied,10/14/2009 03:01:46 PM,"(40.7802039792471, -73.9774144709456)",2009-10-14 12:00:27,2009-10-14 03:01:46,Wednesday,October,"Y:2009,W:42",0 days
1,BAIT,2,PO12966,3,1011470034,1,1147,34,102,WEST 76 STREET,...,10/14/2009 12:51:21 PM,Bait applied,10/14/2009 03:02:30 PM,"(40.7801875030438, -73.977374757787)",2009-10-14 12:51:21,2009-10-14 03:02:30,Wednesday,October,"Y:2009,W:42",0 days
2,BAIT,30,PO16966,3,2043370027,2,4337,27,620,THWAITES PLACE,...,11/09/2009 12:59:55 PM,Bait applied,11/10/2009 02:54:52 PM,"(40.8588765781972, -73.8703636422023)",2009-11-09 12:59:55,2009-11-10 02:54:52,Monday,November,"Y:2009,W:46",1 days
3,BAIT,31,PO13665,3,2037670077,2,3767,77,1227,WHITEPLAINS ROAD,...,11/09/2009 11:10:16 AM,Bait applied,11/10/2009 02:56:42 PM,"(40.8313209626148, -73.861994089899)",2009-11-09 11:10:16,2009-11-10 02:56:42,Monday,November,"Y:2009,W:46",1 days
4,BAIT,38,PO11291,3,1011690057,1,1169,57,2199,BROADWAY,...,11/10/2009 08:40:42 AM,Bait applied,11/17/2009 11:39:11 AM,"(40.7830590725833, -73.9805333640688)",2009-11-10 08:40:42,2009-11-17 11:39:11,Tuesday,November,"Y:2009,W:46",7 days
