In [3]:
#import all necessary libraries.
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

#load the CSV data file using the pandas read_csv method.
play_tennis = pd.read_csv(r'C:\Users\MyPC\Downloads\PlayTennis.csv')

#Our data contains details about the weather outlook, temperature, humidity and wind conditions. 
#The last column is the target variable that suggests the possibility of playing tennis.
play_tennis.head()

Unnamed: 0,Outlook,Temperature,Humidity,Wind,Play Tennis
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rain,Mild,High,Weak,Yes
4,Rain,Cool,Normal,Weak,Yes


In [4]:
#we use the Python library SKLearn to create a model and make predictions. 
#SKLearn library requires the features to be numerical arrays. 
#So we will need to convert the categorical information in our data into numbers.
#There are multiple ways of doing this, we will keep is simple and use a LabelEncoder for this.
#A LabelEncoder converts a categorical data into a number ranging from 0 to n-1, 
#where n is the number of classes in the variable.
#For example, in case of Outlook, there are 3 clasess – Overcast, Rain, Sunny. 
#These are represented as 0,1,2 in alphabetical order.
number = LabelEncoder()
play_tennis['Outlook'] = number.fit_transform(play_tennis['Outlook'])
play_tennis['Temperature'] = number.fit_transform(play_tennis['Temperature'])
play_tennis['Humidity'] = number.fit_transform(play_tennis['Humidity'])
play_tennis['Wind'] = number.fit_transform(play_tennis['Wind'])
play_tennis['Play Tennis'] = number.fit_transform(play_tennis['Play Tennis'])

#Create the model.
#Define the features and the target variables.
features = ["Outlook", "Temperature", "Humidity", "Wind"]
target = "Play Tennis"

#To validate the performance of our model, we create a train, test split. 
#We build the model using the train dataset and we will validate the model on the test dataset.
#We use SKLearn’s train_test_split to do this.
features_train, features_test, target_train, target_test = train_test_split(play_tennis[features],
play_tennis[target],
test_size = 0.33,
random_state = 54)

#Create the model.
model = KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2 )
model.fit(features_train, target_train)

#Make predictions on the test features.
#We will also measure the performance of the model using accuracy score.
#Accuracy score measure the number of right predictions.
pred = model.predict(features_test)
accuracy = accuracy_score(target_test, pred)
#Model Accuracy, how often is the classifier correct?
print("Accuracy:",accuracy_score(target_test, pred))
#The accuracy is in this case about 0.8

#Now suppose we want to predict for the conditions,
#Outlook	Temperature	Humidity	Wind
#Rain	Mild	High	Weak
print (model.predict([[1,2,0,1]]))
#which gives a prediction 1 (Yes)

Accuracy: 0.8
[1]
