# Imports

In [24]:
# Imports

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Reading in the Data

In [25]:
# Reading in the data

DIR = "../data/"
DATA = "bike-sharing-daily.csv"
day_bike_rentals = pd.read_csv(DIR+DATA)
day_bike_rentals.head()

Unnamed: 0,instant,dteday,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,2011-01-01,1,0,1,0,6,0,2,0.344167,0.363625,0.805833,0.160446,331,654,985
1,2,2011-01-02,1,0,1,0,0,0,2,0.363478,0.353739,0.696087,0.248539,131,670,801
2,3,2011-01-03,1,0,1,0,1,1,1,0.196364,0.189405,0.437273,0.248309,120,1229,1349
3,4,2011-01-04,1,0,1,0,2,1,1,0.2,0.212122,0.590435,0.160296,108,1454,1562
4,5,2011-01-05,1,0,1,0,3,1,1,0.226957,0.22927,0.436957,0.1869,82,1518,1600


# Data Cleaning

## Categorical Features

In [26]:
# Label Lists

weekdays = ['SUN', 'MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT']
holidays = ['NO HOLIDAY', 'HOLIDAY']
working_day = ['NO WORKING DAY', 'WORKING DAY']
season = ['WINTER', 'SPRING', 'SUMMER', 'FALL']
weathersit = ['GOOD', 'MISTY', 'RAIN/SNOW/STORM']
months = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC']

In [27]:
# Function to create a dictionary mapping index of list to the respective label

def create_dictionary(label_list, start_at_zero=True):
  d = {}
  if start_at_zero:
    for idx, val in enumerate(label_list):
      d[idx] = val
  else:
    for idx, val in enumerate(label_list):
      d[idx+1] = val
  return d

In [28]:
# Mapping the indicies to actual labels

day_bike_rentals['weekday'] = day_bike_rentals['weekday'].map(create_dictionary(weekdays))
day_bike_rentals['holiday'] = day_bike_rentals['holiday'].map(create_dictionary(holidays))
day_bike_rentals['workingday'] = day_bike_rentals['workingday'].map(create_dictionary(working_day))
day_bike_rentals['season'] = day_bike_rentals['season'].map(create_dictionary(season, start_at_zero=False))
day_bike_rentals['weathersit'] = day_bike_rentals['weathersit'].map(create_dictionary(weathersit, start_at_zero=False))
day_bike_rentals['mnth'] = day_bike_rentals['mnth'].map(create_dictionary(months, start_at_zero=False))

## Numerical Features

In [29]:
# Denormalizing the temperture

# Function that denormalizes temperture
def inverse_min_max(row, tmin, tmax):
  return row * (tmax - tmin) + tmin

# t_min=-8, t_max=+39
day_bike_rentals['temp'] = day_bike_rentals['temp'].apply(inverse_min_max, args=(-8, 39))

# t_min=-16, t_max=+50
day_bike_rentals['atemp'] = day_bike_rentals['atemp'].apply(inverse_min_max, args=(-16, 50))

In [30]:
# DeNormalized wind speed. The values are divided by 67 (max)

day_bike_rentals['windspeed'] = day_bike_rentals['windspeed'].apply(lambda row: row * 67)

In [31]:
# Dormalized humidity. The values are divided by 100 (max)

day_bike_rentals['hum'] = day_bike_rentals['hum'].apply(lambda row: row * 100)

## Temporal Features

In [32]:
# Processing time features

day_bike_rentals['yr'] = day_bike_rentals['yr'].apply(lambda row: 2011 if row == 0 else 2012)
day_bike_rentals['dteday'] = pd.to_datetime(day_bike_rentals['dteday'])
day_bike_rentals['days_since_2011'] = (day_bike_rentals['dteday'] - day_bike_rentals['dteday'].min()).dt.days

# Data Processing

In [33]:
# Extracting only the needed features

bike = day_bike_rentals[[col for col in day_bike_rentals.columns if col not in ['instant', 'dteday', 'registered', 'casual', 'atemp']]]

In [34]:
# Collecting only the categorical features

categorical_features = [col for col in bike.columns if bike[col].dtype == 'object']
categorical_features

['season', 'mnth', 'holiday', 'weekday', 'workingday', 'weathersit']

## Dummifying Categorical Features

In [35]:
# Collecting all the features that needs to be dummified 
sub_features = ['season', 'holiday','workingday', 'weathersit']
sub_features

['season', 'holiday', 'workingday', 'weathersit']

In [36]:
# Dummifying categorical features
bike = pd.concat([bike, pd.get_dummies(bike[sub_features], drop_first=True)], axis=1)

# Dropping unnecessary columns
bike.drop(sub_features + ['yr', 'mnth', 'weekday'], inplace=True, axis=1)

# Viewing the result
bike.head()

Unnamed: 0,temp,hum,windspeed,cnt,days_since_2011,season_SPRING,season_SUMMER,season_WINTER,holiday_NO HOLIDAY,workingday_WORKING DAY,weathersit_MISTY,weathersit_RAIN/SNOW/STORM
0,8.175849,80.5833,10.749882,985,0,0,0,1,1,0,1,0
1,9.083466,69.6087,16.652113,801,1,0,0,1,1,0,1,0
2,1.229108,43.7273,16.636703,1349,2,0,0,1,1,1,0,0
3,1.4,59.0435,10.739832,1562,3,0,0,1,1,1,0,0
4,2.666979,43.6957,12.5223,1600,4,0,0,1,1,1,0,0
