# Looking at the data

In [39]:
import pandas as pd

data = pd.read_csv("data/eScooterDemand.csv")
data.head()

Unnamed: 0,Date,Count,Hour,Temp,Humidity,Wind speed,Visibility,Dew point,Sunshine,Rain,Snow,Season,Public Holiday,HireAvailable
0,01/12/2017,254,0,-5.2,37,2.2,2000,-17.6,0.0,0.0,0.0,Winter,No,Yes
1,01/12/2017,204,1,-5.5,38,0.8,2000,-17.6,0.0,0.0,0.0,Winter,No,Yes
2,01/12/2017,173,2,-6.0,39,1.0,2000,-17.7,0.0,0.0,0.0,Winter,No,Yes
3,01/12/2017,107,3,-6.2,40,0.9,2000,-17.6,0.0,0.0,0.0,Winter,No,Yes
4,01/12/2017,78,4,-6.0,36,2.3,2000,-18.6,0.0,0.0,0.0,Winter,No,Yes


# Convert boolean variables to binary values

In [40]:
data["Public Holiday"] = data["Public Holiday"].apply(lambda x: 1 if x == "Yes" else 0)
data["HireAvailable"] = data["HireAvailable"].apply(lambda x: 1 if x == "Yes" else 0)
data.head()

Unnamed: 0,Date,Count,Hour,Temp,Humidity,Wind speed,Visibility,Dew point,Sunshine,Rain,Snow,Season,Public Holiday,HireAvailable
0,01/12/2017,254,0,-5.2,37,2.2,2000,-17.6,0.0,0.0,0.0,Winter,0,1
1,01/12/2017,204,1,-5.5,38,0.8,2000,-17.6,0.0,0.0,0.0,Winter,0,1
2,01/12/2017,173,2,-6.0,39,1.0,2000,-17.7,0.0,0.0,0.0,Winter,0,1
3,01/12/2017,107,3,-6.2,40,0.9,2000,-17.6,0.0,0.0,0.0,Winter,0,1
4,01/12/2017,78,4,-6.0,36,2.3,2000,-18.6,0.0,0.0,0.0,Winter,0,1


# One-hot encode season data

In [41]:
for season in ["Winter", "Autumn", "Spring", "Summer"]:
    data[season] = data["Season"].apply(lambda x: 1 if x == season else 0)
del data["Season"]
data.head()

Unnamed: 0,Date,Count,Hour,Temp,Humidity,Wind speed,Visibility,Dew point,Sunshine,Rain,Snow,Public Holiday,HireAvailable,Winter,Autumn,Spring,Summer
0,01/12/2017,254,0,-5.2,37,2.2,2000,-17.6,0.0,0.0,0.0,0,1,1,0,0,0
1,01/12/2017,204,1,-5.5,38,0.8,2000,-17.6,0.0,0.0,0.0,0,1,1,0,0,0
2,01/12/2017,173,2,-6.0,39,1.0,2000,-17.7,0.0,0.0,0.0,0,1,1,0,0,0
3,01/12/2017,107,3,-6.2,40,0.9,2000,-17.6,0.0,0.0,0.0,0,1,1,0,0,0
4,01/12/2017,78,4,-6.0,36,2.3,2000,-18.6,0.0,0.0,0.0,0,1,1,0,0,0


# Break date column into day, month and year columns

In [42]:
data["day"] = data["Date"].apply(lambda x: int(x.split("/")[0]))
data["month"] = data["Date"].apply(lambda x: int(x.split("/")[1]))
data["year"] = data["Date"].apply(lambda x: int(x.split("/")[2]))
del data["Date"]
data.head()

Unnamed: 0,Count,Hour,Temp,Humidity,Wind speed,Visibility,Dew point,Sunshine,Rain,Snow,Public Holiday,HireAvailable,Winter,Autumn,Spring,Summer,day,month,year
0,254,0,-5.2,37,2.2,2000,-17.6,0.0,0.0,0.0,0,1,1,0,0,0,1,12,2017
1,204,1,-5.5,38,0.8,2000,-17.6,0.0,0.0,0.0,0,1,1,0,0,0,1,12,2017
2,173,2,-6.0,39,1.0,2000,-17.7,0.0,0.0,0.0,0,1,1,0,0,0,1,12,2017
3,107,3,-6.2,40,0.9,2000,-17.6,0.0,0.0,0.0,0,1,1,0,0,0,1,12,2017
4,78,4,-6.0,36,2.3,2000,-18.6,0.0,0.0,0.0,0,1,1,0,0,0,1,12,2017


# Normalize float columns

In [43]:
from sklearn.preprocessing import minmax_scale

data[
    [
        "Temp",
        "Humidity",
        "Wind speed",
        "Visibility",
        "Dew point",
        "Sunshine",
        "Rain",
        "Snow",
    ]
] = minmax_scale(
    data[
        [
            "Temp",
            "Humidity",
            "Wind speed",
            "Visibility",
            "Dew point",
            "Sunshine",
            "Rain",
            "Snow",
        ]
    ]
)
data.head()

Unnamed: 0,Count,Hour,Temp,Humidity,Wind speed,Visibility,Dew point,Sunshine,Rain,Snow,Public Holiday,HireAvailable,Winter,Autumn,Spring,Summer,day,month,year
0,254,0,0.22028,0.377551,0.297297,1.0,0.224913,0.0,0.0,0.0,0,1,1,0,0,0,1,12,2017
1,204,1,0.215035,0.387755,0.108108,1.0,0.224913,0.0,0.0,0.0,0,1,1,0,0,0,1,12,2017
2,173,2,0.206294,0.397959,0.135135,1.0,0.223183,0.0,0.0,0.0,0,1,1,0,0,0,1,12,2017
3,107,3,0.202797,0.408163,0.121622,1.0,0.224913,0.0,0.0,0.0,0,1,1,0,0,0,1,12,2017
4,78,4,0.206294,0.367347,0.310811,1.0,0.207612,0.0,0.0,0.0,0,1,1,0,0,0,1,12,2017
