# 7. Transforming Variables in the Bike Sharing Dataset

Importing the libraries

In [1]:
import pandas as pd
import numpy as np
from scipy import stats

Loading the dataset

In [2]:
df = pd.read_csv('./datasets/bike_sharing.csv')

In [3]:
df.head()

Unnamed: 0,instant,dteday,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,01-01-2018,1,0,1,0,6,0,2,14.110847,18.18125,80.5833,10.749882,331,654,985
1,2,02-01-2018,1,0,1,0,0,0,2,14.902598,17.68695,69.6087,16.652113,131,670,801
2,3,03-01-2018,1,0,1,0,1,1,1,8.050924,9.47025,43.7273,16.636703,120,1229,1349
3,4,04-01-2018,1,0,1,0,2,1,1,8.2,10.6061,59.0435,10.739832,108,1454,1562
4,5,05-01-2018,1,0,1,0,3,1,1,9.305237,11.4635,43.6957,12.5223,82,1518,1600


Calculate skewness

In [4]:
skewness = df.skew()
skewness

  skewness = df.skew()


instant       0.000000
season       -0.002571
yr            0.000000
mnth         -0.010478
holiday       5.650014
weekday       0.002745
workingday   -0.790992
weathersit    0.961855
temp         -0.057187
atemp        -0.133709
hum          -0.067476
windspeed     0.676314
casual        1.266328
registered    0.041211
cnt          -0.049581
dtype: float64

Identifying variables with high skewness

In [5]:
skewed_vars = skewness[abs(skewness) > 0.5].index
print(f"Skewed Variables: {skewed_vars}")

Skewed Variables: Index(['holiday', 'workingday', 'weathersit', 'windspeed', 'casual'], dtype='object')


# Applying transformations

Log Transformation

In [6]:
for var in skewed_vars:
    if df[var].min() > 0:
        df[f'{var}_log'] = np.log(df[var])

Square Root Transformation

In [7]:
for var in skewed_vars:
    if df[var].min() >= 0:
        df[f'{var}_sqrt'] = np.sqrt(df[var])

Box-Cox Transformation

In [8]:
for var in skewed_vars:
    if df[var].min() > 0:
        df[f'{var}_boxcox'], _ = stats.boxcox(df[var])

Updated Dataframe

In [9]:
df.head()

Unnamed: 0,instant,dteday,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,...,windspeed_log,casual_log,holiday_sqrt,workingday_sqrt,weathersit_sqrt,windspeed_sqrt,casual_sqrt,weathersit_boxcox,windspeed_boxcox,casual_boxcox
0,1,01-01-2018,1,0,1,0,6,0,2,14.110847,...,2.374895,5.802118,0.0,0.0,1.414214,3.278701,18.193405,0.342496,4.050311,16.39782
1,2,02-01-2018,1,0,1,0,0,0,2,14.902598,...,2.812537,4.875197,0.0,0.0,1.414214,4.0807,11.445523,0.342496,5.338332,11.472437
2,3,03-01-2018,1,0,1,0,1,1,1,8.050924,...,2.811611,4.787492,0.0,1.0,1.0,4.078811,10.954451,0.0,5.335352,11.076035
3,4,04-01-2018,1,0,1,0,2,1,1,8.2,...,2.373959,4.682131,0.0,1.0,1.0,3.277168,10.392305,0.0,4.047801,10.61397
4,5,05-01-2018,1,0,1,0,3,1,1,9.305237,...,2.527511,4.406719,0.0,1.0,1.0,3.538686,9.055385,0.0,4.473128,9.475537
