In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.preprocessing import LabelEncoder

#PREPROCESSING:Preprocessing ka matlab hai data ko machine learning model ke liye ready banana, 
#jaise missing values ko fill karna, scaling karna, aur categorical data ko numeric data 
#mein convert karna.

#LABELENCODER:LabelEncoder ek tool hai jo categorical data (jaise "Red", "Green", "Blue") 
#ko numbers (jaise 0, 1, 2) mein convert karta hai, taake model usse samajh sake.



data = {
    'Weather': ['sunny', 'sunny', 'overcast', 'rainy', 'rainy', 'rainy', 'overcast', 'sunny', 'sunny', 'rainy', 'sunny', 'overcast', 'overcast', 'rainy'],
    'Temperature': ['hot', 'hot', 'hot', 'mild', 'cool', 'cool', 'cool', 'mild', 'cool', 'mild', 'mild', 'mild', 'hot', 'mild'],
    'Play': ['no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'yes', 'no']
}


# Isse aik data frame banega.
df = pd.DataFrame(data)


#Initilaizing our label encoder.
labelEncoder=LabelEncoder()

#fit_transform() method LabelEncoder mein use hota hai. 
#Pehle fit() se data ko samajh kar encoding seekhi jati hai, 
#aur phir transform() se data ko numbers mein convert kiya jata hai. 
#Ye dono kaam ek saath fit_transform() mein ho jate hain.


df['Weather'] = labelEncoder.fit_transform(df['Weather'])
# Is step mn dataframe k weather column ko encode kerke update kia he.
#matlab sunny ko 2 , overcast ko 0 and rainy ko 1 value di he.

df['Temperature'] = labelEncoder.fit_transform(df['Temperature'])
# Similarly is step mn hot ki jgha 1,mild ki jagha 0 and cool ki jagha 2 value 


df['Play'] = labelEncoder.fit_transform(df['Play'])
# Yahan per No ki jagha 0 and Yes ki jagha 1 aya.



# Print the updated DataFrame with numerical encoding
print(df)


X = df[['Weather', 'Temperature']] 
y = df['Play'] 
#Here we have separated input column and output column.


#TRAIN_TEST_SPLIT function ka purpose:
#Jab hum model train karte hain, hamein apna data do parts mein divide karna padta hai:
#Training Data: Model ko training dene ke liye, yaani model ye data use kar ke predict karna seekhta hai.
#Testing Data: Isse model ko test kiya jata hai, yaani model ko check karte hain ke wo kitna sahi predict kar sakta hai.
#Isme:test_size=0.3 ka matlab hai ke 30% data testing ke liye use hoga, aur 70% data training ke liye use hoga.
#random_state=42 ka matlab hai ke split random hai, lekin har dafa run karne par same split milega.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)



# Initialize Naive Bayes model (GaussianNB is the Naive Bayes model)
model = GaussianNB()





#Jab hum model.fit(X_train, y_train) use karte hain, 
#toh hum model ko training data X_train aur target y_train ke saath train kar rahe hain.
#Model ab X_train aur y_train ko use karke apna internal logic samajhta hai 
#aur parameters adjust karta hai taake wo training data par best prediction kar sake.
model.fit(X_train, y_train)




#INPUT_DATA
#is line mein hum ek specific input scenario create kar rahe hain, jismein "Weather" aur "Temperature" ka ek set diya gaya hai.
#[[1, 2]]: Yeh list hai jo 'Weather' aur 'Temperature' values ko represent kar rahi hai.
#Yahaan 1 ka matlab hai "overcast" (kyunki humne pehle 'Weather' column ko encode kiya tha aur overcast ko 1 diya tha).
#2 ka matlab hai "mild" (kyunki 'Temperature' mein mild ko 2 encode kiya tha).
#columns=['Weather', 'Temperature']: Yeh columns ka naam define karte hain.
#Outcome:Yeh DataFrame humare input scenario ko model mein predict karne ke liye ready karta hai.
input_data = pd.DataFrame([[1, 2]], columns=['Weather', 'Temperature'])



#PREDICTION
#Yeh line input_data par prediction generate karti hai.
#model.predict(input_data) ka matlab hai ke hum model se ye keh rahe hain ke 
#input_data ke values ko dekhte hue batao ke 'Play' hoga ya nahi (yes/no).
#Model ab predict karega aur encoded format mein ek output dega (0 ya 1).
#Outcome:prediction variable mein output aayega jo ya to 0 (no) ya 1 (yes) ho sakta hai.
prediction = model.predict(input_data)
print("Prediction==>",prediction)



#LEPLAY LABELENCODER
#Is line mein ek naya LabelEncoder instance banaya gaya hai jo Play column ke encoding ke liye hai.
#yeh encoder kaam aayega encoded result ko wapas human-readable form (yes/no) mein convert karne ke liye.
le_play = LabelEncoder()


#Yahaan hum le_play ko fit kar rahe hain "no" aur "yes" values ke encoding ke liye.
#Iska matlab hai ke "no" ko 0 aur "yes" ko 1 map kiya gaya hai.
le_play.fit(['no', 'yes'])


#inverse_transform() ka matlab hai ke model ka encoded prediction (0 ya 1) ko wapas original form mein (yes/no) convert karna.
#prediction mein jo encoded value aayi hai usse inverse_transform ke zariye wapas readable format mein convert kar rahe hain.
predicted_play = le_play.inverse_transform(prediction)


print(f"Prediction for Weather=overcast, Temperature=mild: {predicted_play[0]}")


#Is line mein hum testing data X_test ke liye predictions generate kar rahe hain.
#model.predict(X_test) se model X_test mein har row ke liye prediction karega aur output dega.
#Outcome:y_pred mein har testing instance ka prediction aayega, jise model ne predict kiya hai.
y_pred = model.predict(X_test)



#Confusion matrix ek table hai jo model ki prediction aur actual values ke beech mein comparison ko show karta hai. Yeh matrix humein batata hai:
#True Positives (TP): Jahan model ne sahi tarah yes predict kiya jab actual bhi yes tha.
#True Negatives (TN): Jahan model ne sahi tarah no predict kiya jab actual bhi no tha.
#False Positives (FP): Jahan model ne galat tarah yes predict kiya jab actual no tha.
#False Negatives (FN): Jahan model ne galat tarah no predict kiya jab actual yes tha.
#Confusion matrix se humein yeh idea milta hai ke model kin areas mein galat ho raha hai aur kis type ke errors zyada hain.
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)



accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

    Weather  Temperature  Play
0         2            1     0
1         2            1     0
2         0            1     1
3         1            2     1
4         1            0     1
5         1            0     0
6         0            0     1
7         2            2     0
8         2            0     1
9         1            2     1
10        2            2     1
11        0            2     1
12        0            1     1
13        1            2     0
Prediction==> [0]
Prediction for Weather=overcast, Temperature=mild: no
Confusion Matrix:
[[1 1]
 [1 4]]
Accuracy: 0.71
