In [9]:
#################### Binary Label ##########################
# We will be using a handmade data

from sklearn import preprocessing
# Import Gaussian Naive Bayes model
from sklearn.naive_bayes import GaussianNB

weather=['Sunny','Sunny','Overcast','Rainy','Rainy','Rainy','Overcast','Sunny','Sunny',
'Rainy','Sunny','Overcast','Overcast','Rainy']
temp=['Hot','Hot','Hot','Mild','Cool','Cool','Cool','Mild','Cool','Mild','Mild','Mild','Hot','Mild']

play=['No','No','Yes','Yes','Yes','No','Yes','No','Yes','Yes','Yes','Yes','Yes','No']

In [10]:
# First, we need to convert these string labels into numbers. 
# for example: 'Overcast', 'Rainy', 'Sunny' as 0, 1, 2. This is known as label encoding.

# creating labelEncoder
le = preprocessing.LabelEncoder()
weather_encoded = le.fit_transform(weather)
print("Weather: ", weather_encoded)

# Similarly, we can also encode temp and play columns.
temp_encoded = le.fit_transform(temp)
label = le.fit_transform(play)
print("Temp: ", temp_encoded)
print("Play: ", label)

Weather:  [2 2 0 1 1 1 0 2 2 1 2 0 0 1]
Temp:  [1 1 1 2 0 0 0 2 0 2 2 2 1 2]
Play:  [0 0 1 1 1 0 1 0 1 1 1 1 1 0]


In [11]:
# Now combine both the features (weather and temp) in a single variable (list of tuples).
features = list(zip(weather_encoded, temp_encoded))
print(features)

[(2, 1), (2, 1), (0, 1), (1, 2), (1, 0), (1, 0), (0, 0), (2, 2), (2, 0), (1, 2), (2, 2), (0, 2), (0, 1), (1, 2)]


In [15]:
# Create a Gaussian Classifier
model = GaussianNB()
# Train the model using training data
model.fit(features, label)
# Predict output
predicted = model.predict([[0,2]]) # 0:Overcast 2:mild
print("Predicted value: ", predicted)

Predicted value:  [1]


In [27]:
#################### Multiple Label ##########################
# We can use wine data
from sklearn.datasets import load_wine
# Create object of dataset
wine = load_wine()
# Print features name
print("Features: ", wine.feature_names)
print("\nTarget: ", wine.target_names)
print("\nShape: ",wine.data.shape)
# print the wine data features (top 5 records)
print(wine.data[0:5])
# print target
print(wine.target)

Features:  ['alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium', 'total_phenols', 'flavanoids', 'nonflavanoid_phenols', 'proanthocyanins', 'color_intensity', 'hue', 'od280/od315_of_diluted_wines', 'proline']

Target:  ['class_0' 'class_1' 'class_2']

Shape:  (178, 13)
[[1.423e+01 1.710e+00 2.430e+00 1.560e+01 1.270e+02 2.800e+00 3.060e+00
  2.800e-01 2.290e+00 5.640e+00 1.040e+00 3.920e+00 1.065e+03]
 [1.320e+01 1.780e+00 2.140e+00 1.120e+01 1.000e+02 2.650e+00 2.760e+00
  2.600e-01 1.280e+00 4.380e+00 1.050e+00 3.400e+00 1.050e+03]
 [1.316e+01 2.360e+00 2.670e+00 1.860e+01 1.010e+02 2.800e+00 3.240e+00
  3.000e-01 2.810e+00 5.680e+00 1.030e+00 3.170e+00 1.185e+03]
 [1.437e+01 1.950e+00 2.500e+00 1.680e+01 1.130e+02 3.850e+00 3.490e+00
  2.400e-01 2.180e+00 7.800e+00 8.600e-01 3.450e+00 1.480e+03]
 [1.324e+01 2.590e+00 2.870e+00 2.100e+01 1.180e+02 2.800e+00 2.690e+00
  3.900e-01 1.820e+00 4.320e+00 1.040e+00 2.930e+00 7.350e+02]]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

In [30]:
# Now we need to split data into training and testing set
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(wine.data, wine.target, test_size=0.3, random_state=109)

In [32]:
# As we have already imported Gaussian Naive Bayes model and also created a Gaussian Classifier called 'model' above

#Train the model using the training sets
model.fit(X_train, y_train)
#Predict the response for test dataset
y_pred = model.predict(X_test)

In [33]:
# After model generation, check the accuracy using actual and predicted values.
from sklearn import metrics

# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.9074074074074074
