In [21]:
import pandas as pd
import numpy as np
import matplotlib as plt
import random
from sklearn.preprocessing import OneHotEncoder
from sklearn.neighbors import KNeighborsClassifier

In [15]:
path = 'SeoulBikeData.csv'
df = pd.read_csv(path)

In [9]:
# Date, Seasons and Holiday are categorical values
df.dtypes

Date                         object
Rented Bike Count             int64
Hour                          int64
Temperature(C)              float64
Humidity(%)                   int64
Wind speed (m/s)            float64
Visibility (10m)              int64
Dew point temperature(C)    float64
Solar Radiation (MJ/m2)     float64
Rainfall(mm)                float64
Snowfall (cm)               float64
Seasons                      object
Holiday                      object
Functioning Day               int64
dtype: object

In [14]:
X = df.drop('Rented Bike Count', axis= 1)
y = df['Rented Bike Count']
y.max()

9511

In [22]:
X['Seasons'].unique()

array(['Winter', 'Spring', 'Summer', 'Autumn'], dtype=object)

In [32]:
X_final = X.copy()

In [38]:
# Pre-processing the data
def OneHotEnconder_specificColumns( X_final, name : str ):
    one_hot_encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False).set_output(transform='pandas')
    one_hot_encoderTransform = one_hot_encoder.fit_transform(X[[name]])
    X_final = pd.concat( [X_final, one_hot_encoderTransform], axis= 1).drop(columns=[name])
    return X_final

In [39]:
X_final = OneHotEnconder_specificColumns(X_final, 'Seasons')
X_final

Unnamed: 0,Date,Hour,Temperature(C),Humidity(%),Wind speed (m/s),Visibility (10m),Dew point temperature(C),Solar Radiation (MJ/m2),Rainfall(mm),Snowfall (cm),Holiday,Functioning Day,Seasons_Autumn,Seasons_Spring,Seasons_Summer,Seasons_Winter
0,01/12/2017,0,-5.2,37,2.2,2000,-17.6,0.0,0.0,0.0,No Holiday,1,0.0,0.0,0.0,1.0
1,01/12/2017,1,-5.5,38,0.8,2000,-17.6,0.0,0.0,0.0,No Holiday,1,0.0,0.0,0.0,1.0
2,01/12/2017,2,-6.0,39,1.0,2000,-17.7,0.0,0.0,0.0,No Holiday,1,0.0,0.0,0.0,1.0
3,01/12/2017,3,-6.2,40,0.9,2000,-17.6,0.0,0.0,0.0,No Holiday,1,0.0,0.0,0.0,1.0
4,01/12/2017,4,-6.0,36,2.3,2000,-18.6,0.0,0.0,0.0,No Holiday,1,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,30/11/2018,19,4.2,34,2.6,1894,-10.3,0.0,0.0,0.0,No Holiday,1,1.0,0.0,0.0,0.0
8756,30/11/2018,20,3.4,37,2.3,2000,-9.9,0.0,0.0,0.0,No Holiday,1,1.0,0.0,0.0,0.0
8757,30/11/2018,21,2.6,39,0.3,1968,-9.9,0.0,0.0,0.0,No Holiday,1,1.0,0.0,0.0,0.0
8758,30/11/2018,22,2.1,41,1.0,1859,-9.8,0.0,0.0,0.0,No Holiday,1,1.0,0.0,0.0,0.0


In [40]:
X_final = OneHotEnconder_specificColumns(X_final, 'Holiday')
X_final

Unnamed: 0,Date,Hour,Temperature(C),Humidity(%),Wind speed (m/s),Visibility (10m),Dew point temperature(C),Solar Radiation (MJ/m2),Rainfall(mm),Snowfall (cm),Functioning Day,Seasons_Autumn,Seasons_Spring,Seasons_Summer,Seasons_Winter,Holiday_Holiday,Holiday_No Holiday
0,01/12/2017,0,-5.2,37,2.2,2000,-17.6,0.0,0.0,0.0,1,0.0,0.0,0.0,1.0,0.0,1.0
1,01/12/2017,1,-5.5,38,0.8,2000,-17.6,0.0,0.0,0.0,1,0.0,0.0,0.0,1.0,0.0,1.0
2,01/12/2017,2,-6.0,39,1.0,2000,-17.7,0.0,0.0,0.0,1,0.0,0.0,0.0,1.0,0.0,1.0
3,01/12/2017,3,-6.2,40,0.9,2000,-17.6,0.0,0.0,0.0,1,0.0,0.0,0.0,1.0,0.0,1.0
4,01/12/2017,4,-6.0,36,2.3,2000,-18.6,0.0,0.0,0.0,1,0.0,0.0,0.0,1.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,30/11/2018,19,4.2,34,2.6,1894,-10.3,0.0,0.0,0.0,1,1.0,0.0,0.0,0.0,0.0,1.0
8756,30/11/2018,20,3.4,37,2.3,2000,-9.9,0.0,0.0,0.0,1,1.0,0.0,0.0,0.0,0.0,1.0
8757,30/11/2018,21,2.6,39,0.3,1968,-9.9,0.0,0.0,0.0,1,1.0,0.0,0.0,0.0,0.0,1.0
8758,30/11/2018,22,2.1,41,1.0,1859,-9.8,0.0,0.0,0.0,1,1.0,0.0,0.0,0.0,0.0,1.0


In [20]:
def SpecifyClass( max_item : int, numberOfClasses : int, number : float ) -> int:
    intervals = []
    for i in range(1, numberOfClasses + 1):
        intervals.append( i * max_item / numberOfClasses )
    class_number = 0
    for interval_range in intervals:
        if( number <= interval_range ):
            return class_number
        class_number += 1

def TransformToClasses( array, numberOfClasses : int ) -> np.array:
    if( numberOfClasses <= 2 or numberOfClasses >= 11 ):
        print("Not in the exercise boundaries")
        return
    arrayClasses = []
    max_item = max(array)
    for item in array:
        arrayClasses.append(SpecifyClass(max_item, numberOfClasses, item))
    return np.array(arrayClasses)

## Testing TransformToClasses
array = [1, 2, 3, 5, 6, 7]
arrayClass = TransformToClasses(array, numberOfClasses=7)
print(arrayClass)

[0 1 2 4 5 6]
