#### Feature Engineering ####
The process of `feature engineering` includes following steps:

- Brainstorming or Testing features;
- Deciding what features to create;
- Creating features;
- Checking how the features work with your model;
- Improving your features if needed;
- Go back to brainstorming/creating more features until the work is done.

In [57]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
import json
import holidays as hd
import calendar
from datetime import datetime, date
from pprint import pprint

In [58]:
cycle_usage = pd.read_csv("cycleusage_cleansed.csv")
cycle_usage.tail()

Unnamed: 0,StartStation Id,Start Date,EndStation Id,End Date,Duration,StartStation Id Used,EndStation Id Used,Frequency,StartStation Address,StartStation latitude,...,EndStation Address,EndStation latitude,EndStation longitude,EndStation capacity,distance (geodesic),Daily Weather,Hourly Weather,Humidity,Windspeed,Apparent Temperature (Avg)
470185,154,29/07/2015 07:37,223,29/07/2015 07:54,1020,302205,39262,177,"Waterloo Station 3, Waterloo",51.503792,...,"Rodney Road , Walworth",51.491485,-0.090221,24,2.082942,partly-cloudy-day,"[{'time': 1438124400, 'summary': 'Clear', 'ico...",0.69,2.9,
470186,154,20/06/2015 17:59,457,20/06/2015 18:16,1020,302205,62313,114,"Waterloo Station 3, Waterloo",51.503792,...,"Castlehaven Road, Camden Town",51.542138,-0.145393,29,4.828189,partly-cloudy-day,"[{'time': 1434754800, 'summary': 'Partly Cloud...",0.79,1.05,
470187,154,22/07/2015 00:13,483,22/07/2015 00:30,1020,302205,38078,11,"Waterloo Station 3, Waterloo",51.503792,...,"Albert Gardens, Stepney",51.51328,-0.047784,33,4.637327,partly-cloudy-day,"[{'time': 1437519600, 'summary': 'Partly Cloud...",0.7,2.61,
470188,154,26/09/2015 13:05,47,26/09/2015 13:30,1500,302205,56112,17,"Waterloo Station 3, Waterloo",51.503792,...,"Warwick Avenue Station, Maida Vale",51.523345,-0.183846,19,5.388971,partly-cloudy-day,"[{'time': 1443222000, 'summary': 'Clear', 'ico...",0.76,0.86,
470189,14,27/04/2016 12:01,174,27/04/2016 12:18,1020,348832,65266,912,"Belgrove Street , King's Cross",51.529944,...,"Strand, Strand",51.512529,-0.115163,36,2.02443,partly-cloudy-day,"[{'time': 1461711600, 'summary': 'Clear', 'ico...",0.67,2.54,


In [59]:
rm_columns = {
    #"StartStation Id",
    #"Start Date",
    "StartStation Address",
   # "StartStation capacity",
    #"EndStation Id",
    "End Date",
    "EndStation Address",
   # "EndStation capacity",
   # "Duration",
   # "Frequency",
    "Humidity",
    "Windspeed",
    "Apparent Temperature (Avg)",
    "StartStation Id Used",
    "EndStation Id Used",
    "StartStation latitude",
    "StartStation longitude",
    "EndStation latitude",
    "EndStation longitude",
    "Hourly Weather",
   # "distance (geodesic)"
   # "Daily Weather"
}

cycle_usage.drop(columns=rm_columns, inplace=True)
cycle_usage.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 470190 entries, 0 to 470189
Data columns (total 9 columns):
StartStation Id          470190 non-null int64
Start Date               470190 non-null object
EndStation Id            470190 non-null int64
Duration                 470190 non-null int64
Frequency                470190 non-null int64
StartStation capacity    470190 non-null int64
EndStation capacity      470190 non-null int64
distance (geodesic)      470190 non-null float64
Daily Weather            470190 non-null object
dtypes: float64(1), int64(6), object(2)
memory usage: 32.3+ MB


In [60]:
# Check for empty values and empty strings
np.where(pd.isnull(cycle_usage))
np.where(cycle_usage.applymap(lambda x: x == ''))

(array([], dtype=int64), array([], dtype=int64))

#### Darksky note:#####
> Our system is presently very simple: it finds the “worst” weather condition that will happen during the day (4AM to 4AM), and uses the icon for it. The only case where a daily icon will show a *-night value is partly-cloudy-night, and this is done to match the daily summary text. We already have intentions to change this behavior, because it is confusing. 
In the meantime, you can assume that if partly-cloudy-night is the worst weather condition that was found, that it was clear during the day. So you can just treat partly-cloudy-night as an alias for clear-day. 

In [61]:
print(cycle_usage.iloc[452861]["Start Date"][:10], cycle_usage.iloc[452861]["Start Date"][-5:])

12/12/2017 08:45


In [62]:
cycle_usage.groupby(by="Daily Weather").count()
cycle_usage["Daily Weather"].loc[cycle_usage["Daily Weather"]=="partly-cloudy-night"] = "clear-day"

In [63]:
# Inconsistent dates e.g. 00:00 01:25:00
cycle_usage.iloc[452861]
for index, p in cycle_usage.iterrows():
    if (len(p["Start Date"]) == 19):
        print(index, p["Start Date"])
        cycle_usage.iloc[index] = p["Start Date"][:16]
    elif (len(p["Start Date"]) > 19):
        print("anomaly", index, p["Start Date"])
        cycle_usage.iloc[index] = p["Start Date"][:10] + " " +p["Start Date"][-5:]

cycle_usage.dropna(inplace=True)
cycle_usage.count()


67 15/12/2015 11:42:00
294 18/12/2015 21:23:00
298 25/12/2015 17:19:00
301 03/01/2016 19:37:00
566 19/12/2015 12:09:00
722 16/12/2015 08:21:00
726 22/12/2015 09:09:00
738 25/12/2015 13:02:00
755 01/01/2016 12:53:00
804 05/01/2016 23:13:00
847 06/01/2016 18:12:00
1199 21/12/2015 08:34:00
1220 19/12/2015 13:07:00
1392 07/01/2016 17:45:00
1412 16/12/2015 21:45:00
1554 25/12/2015 21:44:00
1588 20/12/2015 14:40:00
1707 31/12/2015 21:49:00
1910 09/01/2016 12:59:00
2068 06/01/2016 17:43:00
2288 18/12/2015 01:05:00
2343 20/12/2015 02:25:00
2368 25/12/2015 13:41:00
2533 29/12/2015 12:56:00
2580 01/01/2016 12:41:00
2790 16/12/2015 08:22:00
2909 05/01/2016 20:49:00
2955 14/12/2015 16:35:00
2968 25/12/2015 17:52:00
3087 28/12/2015 13:42:00
3092 04/01/2016 08:36:00
3215 25/12/2015 15:14:00
3328 25/12/2015 11:14:00
3546 19/12/2015 06:56:00
3601 25/12/2015 13:26:00
3639 18/12/2015 13:40:00
3692 25/12/2015 12:28:00
3694 16/12/2015 20:59:00
3790 04/01/2016 18:31:00
4046 30/12/2015 09:40:00
4441 26/12/2

46367 16/12/2015 14:48:00
46416 31/12/2015 16:12:00
46649 18/12/2015 18:12:00
46884 19/12/2015 14:56:00
46989 15/12/2015 09:51:00
47128 24/12/2015 21:58:00
47321 28/12/2015 05:39:00
47330 16/12/2015 23:14:00
47345 09/01/2016 14:39:00
47664 06/01/2016 08:48:00
47736 17/12/2015 17:56:00
47742 13/12/2015 14:33:00
47776 17/12/2015 17:59:00
47782 14/12/2015 18:53:00
47794 23/12/2015 08:43:00
47831 13/12/2015 20:09:00
48207 23/12/2015 16:38:00
48365 25/12/2015 02:03:00
48602 01/01/2016 15:16:00
48610 30/12/2015 18:30:00
48669 20/12/2015 22:10:00
48916 21/12/2015 21:17:00
49007 29/12/2015 15:49:00
49032 21/12/2015 01:26:00
49069 09/01/2016 16:15:00
49105 14/12/2015 15:18:00
49120 05/01/2016 18:16:00
49121 25/12/2015 23:23:00
49393 20/12/2015 00:59:00
49416 13/12/2015 14:36:00
49477 09/01/2016 10:45:00
49545 04/01/2016 16:35:00
49761 15/12/2015 11:06:00
49805 19/12/2015 08:43:00
49883 16/12/2015 18:19:00
49953 25/12/2015 15:31:00
49980 04/01/2016 17:13:00
50044 23/12/2015 08:18:00
50054 19/12/

88253 21/12/2015 22:37:00
88528 30/12/2015 14:56:00
88629 14/12/2015 19:50:00
88907 06/01/2016 21:44:00
89057 25/12/2015 11:04:00
89250 17/12/2015 01:02:00
89373 03/01/2016 00:30:00
89640 03/01/2016 17:20:00
89827 06/01/2016 08:59:00
90082 09/01/2016 16:14:00
90126 02/01/2016 17:02:00
90214 04/01/2016 17:11:00
90593 07/01/2016 23:47:00
90827 18/12/2015 11:41:00
90953 17/12/2015 10:54:00
91061 22/12/2015 15:05:00
91103 25/12/2015 13:29:00
91227 17/12/2015 02:23:00
91398 19/12/2015 18:50:00
91407 09/01/2016 22:44:00
91851 23/12/2015 09:57:00
92031 02/01/2016 13:20:00
92037 13/12/2015 03:48:00
92287 31/12/2015 15:57:00
92301 19/12/2015 12:02:00
92393 29/12/2015 14:05:00
92415 05/01/2016 13:05:00
92723 13/12/2015 15:32:00
92817 18/12/2015 08:30:00
93019 14/12/2015 08:37:00
93179 18/12/2015 16:35:00
93284 07/01/2016 18:12:00
93306 25/12/2015 08:54:00
93469 18/12/2015 00:39:00
93924 01/01/2016 06:39:00
94248 03/01/2016 10:27:00
94387 25/12/2015 10:58:00
94484 31/12/2015 22:09:00
94637 01/01/

132485 25/12/2015 12:00:00
132578 16/12/2015 18:09:00
132604 18/12/2015 16:04:00
132623 29/12/2015 17:52:00
132683 22/12/2015 09:17:00
132698 20/12/2015 15:57:00
132855 25/12/2015 22:35:00
132909 26/12/2015 15:14:00
132928 30/12/2015 22:06:00
133039 26/12/2015 16:53:00
133185 21/12/2015 18:04:00
133258 28/12/2015 14:36:00
133540 05/01/2016 14:16:00
133578 20/12/2015 22:45:00
133620 17/12/2015 18:42:00
133836 25/12/2015 20:21:00
134002 06/01/2016 18:18:00
134212 18/12/2015 05:57:00
134231 09/01/2016 10:44:00
134293 08/01/2016 07:58:00
134465 16/12/2015 19:45:00
134520 14/12/2015 15:19:00
134654 31/12/2015 15:08:00
134957 14/12/2015 09:34:00
135039 21/12/2015 11:56:00
135049 23/12/2015 20:11:00
135195 02/01/2016 14:03:00
135321 13/12/2015 01:25:00
135423 21/12/2015 07:14:00
135460 18/12/2015 18:40:00
135467 19/12/2015 15:47:00
135551 09/01/2016 12:56:00
135568 22/12/2015 19:05:00
135644 13/12/2015 22:31:00
136023 07/01/2016 21:57:00
136027 25/12/2015 17:05:00
136053 28/12/2015 12:20:00
1

167802 15/12/2015 08:40:00
167992 06/01/2016 19:18:00
168096 05/01/2016 15:37:00
168117 05/01/2016 17:40:00
168258 22/12/2015 21:05:00
168439 07/01/2016 19:21:00
168524 27/12/2015 21:07:00
168739 06/01/2016 09:07:00
168783 14/12/2015 16:39:00
168954 30/12/2015 16:36:00
169002 14/12/2015 20:21:00
169080 25/12/2015 13:05:00
169121 09/01/2016 14:44:00
169255 25/12/2015 11:52:00
169257 16/12/2015 17:40:00
169417 27/12/2015 15:36:00
169495 24/12/2015 18:57:00
169557 30/12/2015 23:30:00
169828 04/01/2016 19:06:00
169845 13/12/2015 20:06:00
170013 16/12/2015 09:39:00
170275 25/12/2015 12:24:00
170388 22/12/2015 19:40:00
170484 16/12/2015 17:10:00
170817 14/12/2015 19:07:00
170903 01/01/2016 13:03:00
170928 18/12/2015 08:44:00
171295 19/12/2015 10:13:00
171425 14/12/2015 18:18:00
171440 04/01/2016 20:32:00
171446 15/12/2015 09:26:00
171480 29/12/2015 18:48:00
171596 19/12/2015 14:52:00
171650 14/12/2015 10:26:00
171745 20/12/2015 02:13:00
172032 08/01/2016 16:55:00
172110 31/12/2015 23:23:00
1

206005 14/12/2015 17:15:00
206108 05/01/2016 08:55:00
206127 16/12/2015 09:03:00
206523 20/12/2015 14:16:00
206589 18/12/2015 08:25:00
206796 25/12/2015 16:11:00
207399 31/12/2015 11:47:00
207545 15/12/2015 10:17:00
207781 17/12/2015 19:11:00
208094 23/12/2015 09:31:00
208256 07/01/2016 20:48:00
208276 02/01/2016 11:43:00
208442 08/01/2016 19:47:00
208649 22/12/2015 17:33:00
208714 29/12/2015 15:46:00
208875 01/01/2016 01:22:00
208936 06/01/2016 06:45:00
208989 19/12/2015 23:37:00
209226 09/01/2016 15:21:00
209331 17/12/2015 20:05:00
209341 17/12/2015 08:49:00
209445 20/12/2015 01:22:00
209452 14/12/2015 08:03:00
209794 06/01/2016 16:46:00
209943 15/12/2015 12:33:00
210057 15/12/2015 18:59:00
210121 06/01/2016 13:50:00
210155 01/01/2016 14:03:00
210161 30/12/2015 18:46:00
210284 30/12/2015 00:26:00
210508 23/12/2015 04:02:00
210575 21/12/2015 00:08:00
210593 25/12/2015 17:38:00
210770 15/12/2015 08:12:00
210987 20/12/2015 14:16:00
211076 16/12/2015 10:07:00
211153 23/12/2015 17:37:00
2

249474 06/01/2016 14:02:00
249475 31/12/2015 14:50:00
249503 28/12/2015 12:11:00
249581 18/12/2015 10:23:00
249644 19/12/2015 10:07:00
249712 14/12/2015 16:16:00
249813 07/01/2016 15:52:00
249823 13/12/2015 14:15:00
249837 23/12/2015 08:58:00
250105 20/12/2015 23:21:00
250346 05/01/2016 18:05:00
250692 25/12/2015 09:43:00
250713 03/01/2016 20:20:00
250745 28/12/2015 12:43:00
251257 25/12/2015 19:20:00
251286 17/12/2015 00:57:00
251580 04/01/2016 13:13:00
251639 14/12/2015 08:42:00
251647 25/12/2015 02:29:00
251723 23/12/2015 08:45:00
252260 25/12/2015 16:34:00
252279 25/12/2015 08:21:00
252439 16/12/2015 10:24:00
252460 29/12/2015 12:45:00
252743 08/01/2016 16:45:00
252870 25/12/2015 08:39:00
253160 25/12/2015 12:11:00
253525 22/12/2015 14:20:00
253583 25/12/2015 20:07:00
253670 18/12/2015 00:02:00
253766 25/12/2015 09:13:00
254066 02/01/2016 11:20:00
254109 18/12/2015 11:39:00
254121 13/12/2015 11:01:00
254318 03/01/2016 00:02:00
254467 09/01/2016 14:54:00
254515 02/01/2016 12:34:00
2

289291 29/12/2015 23:02:00
289308 24/12/2015 18:10:00
289314 25/12/2015 14:03:00
289343 20/12/2015 13:23:00
289386 19/12/2015 19:11:00
289438 22/12/2015 21:53:00
289507 25/12/2015 12:14:00
289577 19/12/2015 21:15:00
289585 30/12/2015 08:37:00
289606 16/12/2015 08:20:00
289910 15/12/2015 08:55:00
289950 07/01/2016 16:14:00
290241 13/12/2015 16:23:00
290257 28/12/2015 13:14:00
290405 26/12/2015 13:49:00
290415 20/12/2015 20:05:00
290538 31/12/2015 06:54:00
290737 01/01/2016 23:24:00
290924 20/12/2015 14:06:00
291144 18/12/2015 12:57:00
291150 04/01/2016 19:32:00
291182 16/12/2015 14:13:00
291227 25/12/2015 13:13:00
291234 19/12/2015 15:01:00
291256 16/12/2015 16:13:00
291432 17/12/2015 23:50:00
291489 26/12/2015 11:47:00
291624 04/01/2016 12:17:00
291698 14/12/2015 08:54:00
291703 29/12/2015 09:58:00
291887 17/12/2015 09:50:00
291974 25/12/2015 19:58:00
292086 03/01/2016 09:44:00
292426 01/01/2016 15:06:00
292530 17/12/2015 06:40:00
292569 05/01/2016 19:07:00
292649 04/01/2016 14:20:00
2

328271 25/12/2015 14:57:00
328333 07/01/2016 22:13:00
328442 06/01/2016 21:21:00
328961 25/12/2015 08:36:00
329097 19/12/2015 10:55:00
329145 09/01/2016 18:04:00
329203 06/01/2016 08:14:00
329292 04/01/2016 18:04:00
329441 08/01/2016 14:35:00
329480 26/12/2015 15:38:00
329490 19/12/2015 20:29:00
329596 14/12/2015 19:11:00
329646 14/12/2015 17:16:00
329843 18/12/2015 08:49:00
329974 25/12/2015 14:33:00
330119 18/12/2015 00:32:00
330175 06/01/2016 19:12:00
330215 29/12/2015 13:58:00
330439 27/12/2015 05:19:00
330744 20/12/2015 04:32:00
330895 25/12/2015 12:46:00
330975 13/12/2015 15:42:00
331231 25/12/2015 23:39:00
331285 21/12/2015 22:12:00
331359 18/12/2015 14:26:00
331398 16/12/2015 08:26:00
331445 18/12/2015 08:43:00
331472 15/12/2015 18:24:00
331635 20/12/2015 14:58:00
331654 06/01/2016 06:01:00
331690 25/12/2015 14:42:00
331710 04/01/2016 18:12:00
331740 19/12/2015 03:19:00
331800 22/12/2015 11:29:00
331901 30/12/2015 13:18:00
331913 04/01/2016 17:51:00
331963 20/12/2015 12:22:00
3

366285 27/12/2015 01:50:00
366306 02/01/2016 02:58:00
366413 19/12/2015 01:19:00
366439 02/01/2016 21:47:00
367001 18/12/2015 23:35:00
367019 08/01/2016 08:26:00
367280 25/12/2015 14:12:00
367298 05/01/2016 12:35:00
367484 18/12/2015 08:05:00
367510 18/12/2015 00:05:00
367855 21/12/2015 17:53:00
367961 04/01/2016 11:46:00
367972 09/01/2016 04:23:00
368647 05/01/2016 22:13:00
368703 06/01/2016 17:05:00
368857 06/01/2016 12:39:00
368858 21/12/2015 00:25:00
368890 07/01/2016 17:33:00
369052 09/01/2016 07:10:00
369133 26/12/2015 18:54:00
369309 16/12/2015 08:54:00
369384 29/12/2015 14:08:00
369570 05/01/2016 18:42:00
369574 19/12/2015 14:31:00
369582 25/12/2015 17:09:00
369621 07/01/2016 16:07:00
369653 21/12/2015 10:05:00
369711 15/12/2015 09:20:00
370073 26/12/2015 11:39:00
370190 13/12/2015 18:36:00
370397 17/12/2015 10:34:00
370688 04/01/2016 20:32:00
370887 20/12/2015 12:16:00
371098 18/12/2015 01:27:00
371208 18/12/2015 09:10:00
371220 07/01/2016 19:32:00
371253 07/01/2016 18:17:00
3

407269 31/12/2015 21:04:00
407483 20/12/2015 19:34:00
407813 05/01/2016 16:41:00
407838 31/12/2015 12:45:00
408040 06/01/2016 13:34:00
408137 05/01/2016 17:37:00
408520 13/12/2015 22:27:00
408616 25/12/2015 07:55:00
408686 16/12/2015 01:07:00
408723 21/12/2015 15:49:00
408770 25/12/2015 14:01:00
409604 14/12/2015 07:59:00
409659 19/12/2015 10:01:00
409756 03/01/2016 10:28:00
409805 04/01/2016 17:19:00
409910 05/01/2016 12:47:00
410125 25/12/2015 12:29:00
410179 28/12/2015 12:37:00
410588 07/01/2016 19:37:00
410593 14/12/2015 17:08:00
410623 08/01/2016 22:08:00
410644 05/01/2016 17:06:00
410889 18/12/2015 06:05:00
410907 05/01/2016 14:40:00
410950 27/12/2015 21:27:00
411108 06/01/2016 13:25:00
411340 31/12/2015 17:25:00
411399 22/12/2015 16:33:00
411776 02/01/2016 00:07:00
412051 16/12/2015 03:51:00
412382 06/01/2016 19:23:00
412474 25/12/2015 09:38:00
412531 21/12/2015 23:34:00
412538 14/12/2015 15:25:00
412644 15/12/2015 09:18:00
412653 16/12/2015 23:10:00
412725 07/01/2016 18:39:00
4

448890 19/12/2015 03:10:00
448951 14/12/2015 09:10:00
449050 06/01/2016 12:08:00
449084 28/12/2015 10:34:00
449253 25/12/2015 05:45:00
449255 05/01/2016 08:42:00
449263 17/12/2015 16:22:00
449560 29/12/2015 17:15:00
449841 25/12/2015 12:27:00
450000 25/12/2015 07:17:00
450072 05/01/2016 13:32:00
450163 20/12/2015 19:18:00
450213 06/01/2016 11:31:00
450236 02/01/2016 13:34:00
450257 09/01/2016 01:48:00
450389 04/01/2016 11:11:00
450398 16/12/2015 07:52:00
450411 04/01/2016 16:29:00
450458 13/12/2015 22:57:00
450666 13/12/2015 10:31:00
450723 07/01/2016 15:35:00
450889 07/01/2016 19:50:00
451068 03/01/2016 16:00:00
451248 16/12/2015 07:59:00
451460 17/12/2015 22:03:00
451573 07/01/2016 17:51:00
451974 21/12/2015 19:04:00
452129 19/12/2015 13:16:00
452151 07/01/2016 22:24:00
452252 07/01/2016 19:28:00
452285 05/01/2016 07:26:00
452288 05/01/2016 10:34:00
452299 18/12/2015 23:20:00
452806 25/12/2015 18:54:00
anomaly 452861 12/12/2017 2017 08:45
453223 31/12/2015 00:58:00
453227 26/12/2015 

StartStation Id          470190
Start Date               470190
EndStation Id            470190
Duration                 470190
Frequency                470190
StartStation capacity    470190
EndStation capacity      470190
distance (geodesic)      470190
Daily Weather            470190
dtype: int64

Adding weekdays (Monday, Tuesday...)

In [64]:
#Add weekdays
cycle_usage["Start Date"] =  pd.to_datetime(cycle_usage["Start Date"], format='%d/%m/%Y %H:%M')
cycle_usage['Weekday'] = cycle_usage.apply(lambda row: calendar.day_name[row["Start Date"].weekday()],axis=1)

In [65]:
cycle_usage.head()

Unnamed: 0,StartStation Id,Start Date,EndStation Id,Duration,Frequency,StartStation capacity,EndStation capacity,distance (geodesic),Daily Weather,Weekday
0,836,2018-09-06 18:03:00,749,2100,1,24,37,3.55816,partly-cloudy-day,Thursday
1,831,2018-07-24 20:17:00,638,1320,5,25,28,3.93634,clear-day,Tuesday
2,831,2018-06-06 09:33:00,264,1380,1,25,20,6.70453,partly-cloudy-day,Wednesday
3,831,2018-04-18 08:44:00,372,1680,2,25,24,5.45865,clear-day,Wednesday
4,831,2018-08-15 08:45:00,41,1740,1,25,24,6.42797,partly-cloudy-day,Wednesday



`Meteorologische Jahreszeiten` <br>
Nördliche Hemisphäre <br>
Frühling: 1. März bis 31. Mai <br>
Sommer: 1. Juni bis 31. August <br>
Herbst: 1. September bis 30. November <br>
Winter: 1. Dezember bis 28. Februar <br>

In [66]:
#Add seasons
def seasons(p):
    """Get meteorological season"""
    year = int(str(p["Start Date"])[:4])
    date_m = p["Start Date"]
    if date_m >= datetime(year, 3, 1, 0,0,0) and date_m <= datetime(year, 5, 31, 23,59,59):
        return "Spring"
    elif date_m >= datetime(year, 6, 1, 0,0,0) and date_m <= datetime(year, 8, 31, 23,59,59):
        return "Summer"
    elif date_m >= datetime(year, 9, 1, 0,0,0) and date_m <= datetime(year, 11, 30, 23,59,59):
        return "Autumn"
    elif date_m >= datetime(year, 12, 1, 0,0,0) or date_m < datetime(year, 3, 1, 23,59,59):
        return "Winter"
        
cycle_usage['Season'] = cycle_usage.apply(lambda row: seasons(row),axis=1)
cycle_usage.head()

Unnamed: 0,StartStation Id,Start Date,EndStation Id,Duration,Frequency,StartStation capacity,EndStation capacity,distance (geodesic),Daily Weather,Weekday,Season
0,836,2018-09-06 18:03:00,749,2100,1,24,37,3.55816,partly-cloudy-day,Thursday,Autumn
1,831,2018-07-24 20:17:00,638,1320,5,25,28,3.93634,clear-day,Tuesday,Summer
2,831,2018-06-06 09:33:00,264,1380,1,25,20,6.70453,partly-cloudy-day,Wednesday,Summer
3,831,2018-04-18 08:44:00,372,1680,2,25,24,5.45865,clear-day,Wednesday,Spring
4,831,2018-08-15 08:45:00,41,1740,1,25,24,6.42797,partly-cloudy-day,Wednesday,Summer


 ##### Day & Night cycle ####
 After 6 pm night, after 6 am day → “Day” & “Night”

In [67]:
#Day night state
def daynight(p):
    """Returns day or night depending on clock"""
    timeclock = p["Start Date"].hour
    if timeclock > 6 and timeclock < 18:
        return "day"
    else:
        return "night"       
cycle_usage["Day & Night"] = cycle_usage.apply(lambda row: daynight(row), axis=1)

###### Holidays ######
Check if that day is a specific holiday?

In [69]:
#Consider holidays (e.g. Good Friday in UK)
def holiday(p):
    """ Checks if holiday """
    uk_holidays = hd.UK()
    if (p["Start Date"].date() in uk_holidays):
        return True
    else:
        return False
    
for date2, name in sorted(hd.UK(state='London', years=[2015,2016,2017], observed=False).items()):
    print(date2, name)
    
cycle_usage["Holiday"] = cycle_usage.apply(lambda row: holiday(row), axis=1)            
cycle_usage.head()

2015-01-01 New Year's Day
2015-01-02 New Year Holiday [Scotland]
2015-03-17 St. Patrick's Day [Northern Ireland]
2015-04-03 Good Friday
2015-04-06 Easter Monday [England, Wales, Northern Ireland]
2015-05-04 May Day
2015-05-25 Spring Bank Holiday
2015-07-12 Battle of the Boyne [Northern Ireland]
2015-08-03 Summer Bank Holiday [Scotland]
2015-08-31 Late Summer Bank Holiday [England, Wales, Northern Ireland]
2015-11-30 St. Andrew's Day [Scotland]
2015-12-25 Christmas Day
2015-12-26 Boxing Day
2016-01-01 New Year's Day
2016-01-02 New Year Holiday [Scotland]
2016-03-17 St. Patrick's Day [Northern Ireland]
2016-03-25 Good Friday
2016-03-28 Easter Monday [England, Wales, Northern Ireland]
2016-05-02 May Day
2016-05-30 Spring Bank Holiday
2016-07-12 Battle of the Boyne [Northern Ireland]
2016-08-01 Summer Bank Holiday [Scotland]
2016-08-29 Late Summer Bank Holiday [England, Wales, Northern Ireland]
2016-11-30 St. Andrew's Day [Scotland]
2016-12-25 Christmas Day
2016-12-26 Boxing Day
2017-01-01

Unnamed: 0,StartStation Id,Start Date,EndStation Id,Duration,Frequency,StartStation capacity,EndStation capacity,distance (geodesic),Daily Weather,Weekday,Season,Day & Night,Holiday
0,836,2018-09-06 18:03:00,749,2100,1,24,37,3.55816,partly-cloudy-day,Thursday,Autumn,night,False
1,831,2018-07-24 20:17:00,638,1320,5,25,28,3.93634,clear-day,Tuesday,Summer,night,False
2,831,2018-06-06 09:33:00,264,1380,1,25,20,6.70453,partly-cloudy-day,Wednesday,Summer,day,False
3,831,2018-04-18 08:44:00,372,1680,2,25,24,5.45865,clear-day,Wednesday,Spring,day,False
4,831,2018-08-15 08:45:00,41,1740,1,25,24,6.42797,partly-cloudy-day,Wednesday,Summer,day,False


In [70]:
# Months
def months_names(p):
    months = {
        1: "January",
        2: "February",
        3: "March",
        4: "April",
        5: "May",
        6: "June",
        7: "July",
        8: "August",
        9: "September",
        10: "October",
        11: "November",
        12: "December"
    }
    return months.get(p["Start Date"].month, "not defined")

cycle_usage["Month"] = cycle_usage.apply(lambda row: months_names(row), axis=1)

In [71]:
cycle_usage.count()

StartStation Id          470190
Start Date               470190
EndStation Id            470190
Duration                 470190
Frequency                470190
StartStation capacity    470190
EndStation capacity      470190
distance (geodesic)      470190
Daily Weather            470190
Weekday                  470190
Season                   470190
Day & Night              470190
Holiday                  470190
Month                    470190
dtype: int64

In [72]:
rm_columns = {
    #"StartStation Id",
    #"Start Date",
    "StartStation Address",
    "StartStation capacity",
    #"EndStation Id",
    "End Date",
    "EndStation Address",
    "EndStation capacity",
    "Duration",
   # "Frequency",
    "Humidity",
    "Windspeed",
    "Apparent Temperature (Avg)",
    "StartStation Id Used",
    "EndStation Id Used",
    "StartStation latitude",
    "StartStation longitude",
    "EndStation latitude",
    "EndStation longitude",
    "Hourly Weather",
    "distance (geodesic)"
   # "Daily Weather"
}

cycle_usage.drop(columns=rm_columns, inplace=True, errors="ignore")
cycle_usage.head()

Unnamed: 0,StartStation Id,Start Date,EndStation Id,Frequency,Daily Weather,Weekday,Season,Day & Night,Holiday,Month
0,836,2018-09-06 18:03:00,749,1,partly-cloudy-day,Thursday,Autumn,night,False,September
1,831,2018-07-24 20:17:00,638,5,clear-day,Tuesday,Summer,night,False,July
2,831,2018-06-06 09:33:00,264,1,partly-cloudy-day,Wednesday,Summer,day,False,June
3,831,2018-04-18 08:44:00,372,2,clear-day,Wednesday,Spring,day,False,April
4,831,2018-08-15 08:45:00,41,1,partly-cloudy-day,Wednesday,Summer,day,False,August


In [76]:
cycle_usage.to_csv("features.csv", header=True, index=None)