> Apply Naive Bayes to predict if users are likely to play golf based on input features (outlook, temperature, humidity, wind)

In [1]:
import pandas as pd 
import numpy as np

from sklearn.utils.validation import column_or_1d
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import BernoulliNB

import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_excel('datasets/playgolf_data.xlsx', index_col=0)
df

Unnamed: 0_level_0,Outlook,Temperature,Humidity,Wind,Play Golf
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,Sunny,Hot,High,Weak,No
2,Sunny,Hot,High,Strong,No
3,Overcast,Hot,High,Weak,Yes
4,Rain,Mild,High,Weak,Yes
5,Rain,Cool,Normal,Weak,Yes
6,Rain,Cool,Normal,Strong,No
7,Overcast,Cool,Normal,Strong,Yes
8,Sunny,Mild,High,Weak,No
9,Sunny,Cool,Normal,Weak,Yes
10,Rain,Mild,Normal,Weak,Yes


In [3]:
df.info()

<class 'pandas.DataFrame'>
RangeIndex: 14 entries, 1 to 14
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype
---  ------       --------------  -----
 0   Outlook      14 non-null     str  
 1   Temperature  14 non-null     str  
 2   Humidity     14 non-null     str  
 3   Wind         14 non-null     str  
 4   Play Golf    14 non-null     str  
dtypes: str(5)
memory usage: 692.0 bytes


In [4]:
features = df.drop(['Play Golf'], axis=1)
target = df[['Play Golf']]

In [5]:
features = pd.get_dummies(features)
features

Unnamed: 0_level_0,Outlook_Overcast,Outlook_Rain,Outlook_Sunny,Temperature_Cool,Temperature_Hot,Temperature_Mild,Humidity_High,Humidity_Normal,Wind_Strong,Wind_Weak
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,False,False,True,False,True,False,True,False,False,True
2,False,False,True,False,True,False,True,False,True,False
3,True,False,False,False,True,False,True,False,False,True
4,False,True,False,False,False,True,True,False,False,True
5,False,True,False,True,False,False,False,True,False,True
6,False,True,False,True,False,False,False,True,True,False
7,True,False,False,True,False,False,False,True,True,False
8,False,False,True,False,False,True,True,False,False,True
9,False,False,True,True,False,False,False,True,False,True
10,False,True,False,False,False,True,False,True,False,True


In [6]:
le = LabelEncoder()
target = le.fit_transform(column_or_1d(target))

In [7]:
model1 = BernoulliNB()
model1.fit(features, target)

0,1,2
,"alpha  alpha: float or array-like of shape (n_features,), default=1.0 Additive (Laplace/Lidstone) smoothing parameter (set alpha=0 and force_alpha=True, for no smoothing).",1.0
,"force_alpha  force_alpha: bool, default=True If False and alpha is less than 1e-10, it will set alpha to 1e-10. If True, alpha will remain unchanged. This may cause numerical errors if alpha is too close to 0. .. versionadded:: 1.2 .. versionchanged:: 1.4  The default value of `force_alpha` changed to `True`.",True
,"binarize  binarize: float or None, default=0.0 Threshold for binarizing (mapping to booleans) of sample features. If None, input is presumed to already consist of binary vectors.",0.0
,"fit_prior  fit_prior: bool, default=True Whether to learn class prior probabilities or not. If false, a uniform prior will be used.",True
,"class_prior  class_prior: array-like of shape (n_classes,), default=None Prior probabilities of the classes. If specified, the priors are not adjusted according to the data.",


In [8]:
print("The prediction accuracy is :"
      , model1.score(features, target)*100,"%")

The prediction accuracy is : 92.85714285714286 %


In [9]:
class_name = model1.classes_
class_name

array([0, 1])

In [10]:
import pickle

pkl_filename = "exercise_3__play_gold_prediction__predict_model.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(model1, file)

In [11]:
with open(pkl_filename, 'rb') as file:
    playgolf_model = pickle.load(file)

In [12]:
X_test = [[1,0,0,1,0,0,1,0,1,0], [0,0,1,1,0,0,1,0,0,1]]
y_pred = playgolf_model.predict(X_test)
y_pred

array([1, 0])