In [101]:
import pandas as pd
import numpy as np
from pathlib import Path


In [53]:
badminton_play_data = pd.read_csv(Path("badminton_dataset.csv"), index_col=0)

In [63]:
badminton_play_data.describe()
badminton_play_data.dtypes

Temperature    object
Humidity       object
Wind           object
dtype: object

In [55]:
badminton_play_labels = badminton_play_data[["Play_Badminton"]]
badminton_play_data = badminton_play_data.copy()
del badminton_play_data["Play_Badminton"]

In [56]:
# All the values in this case are represented using string values. As soon as, ML libraries can hardly process
# text string data, it will be better to transform it to numerical data. In cases where there is a enum of values, then we can use Encoder

from sklearn.preprocessing import OrdinalEncoder
from sklearn.pipeline import Pipeline

pipeline = Pipeline([("ordinalEncoder", OrdinalEncoder())])
badminton_play_data_transformed = pipeline.fit_transform(badminton_play_data)
badminton_play_labels_transformed = pipeline.fit_transform(badminton_play_labels)



In [79]:
# Select data for the training and for the testing. f.e. 9 to 1.
data_size = badminton_play_data.shape[0]
training_data_chunks = int(data_size/10)

training_data = badminton_play_data_transformed[:training_data_chunks*9]
training_data_labels = badminton_play_labels_transformed[:training_data_chunks*9]

testing_data = badminton_play_data_transformed[training_data.shape[0]:]
testing_data_labels = badminton_play_labels_transformed[training_data.shape[0]:]

In [93]:
# MSE analyzer
from sklearn.metrics import mean_squared_error

def get_rmse(labels, predictions):
    return mean_squared_error(labels,predictions)

In [97]:
# trying different models
# Linear Regression
from sklearn.linear_model import LinearRegression

model=LinearRegression()
model.fit(training_data,training_data_labels)

print(get_rmse(testing_data_labels,model.predict(testing_data)))
# 0.16499945445826736

0.16499945445826736


In [103]:
# Random Forest Regressor
from sklearn.ensemble import RandomForestRegressor

modelRandomForest=RandomForestRegressor()
modelRandomForest.fit(training_data,np.ravel(training_data_labels))

print(get_rmse(testing_data_labels,modelRandomForest.predict(testing_data)))
# 0.24049698456790125

0.25757345219198796


In [106]:
# Decision Tree Regressore
from sklearn.tree import DecisionTreeRegressor


modelDecisionTree=DecisionTreeRegressor()
modelDecisionTree.fit(training_data,np.ravel(training_data_labels))

print(get_rmse(testing_data_labels,modelDecisionTree.predict(testing_data)))
# 0.3888888888888889

0.3888888888888889


In [108]:
# Lasso

from sklearn.linear_model import Lasso

modelLasso=Lasso()
modelLasso.fit(training_data,training_data_labels)

print(get_rmse(testing_data_labels,modelLasso.predict(testing_data)))
# 0.3347050754458161

0.3347050754458161
