# Classification using a Naïve Bayes Classifier

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

data = pd.read_csv('diabetes.csv')

X = data.drop('Outcome', axis=1)
y = data['Outcome']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

nb_classifier = GaussianNB()    # Initialize the Naïve Bayes classifier

nb_classifier.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = nb_classifier.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

print("Classification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 0.7662337662337663
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.80      0.81        99
           1       0.66      0.71      0.68        55

    accuracy                           0.77       154
   macro avg       0.75      0.75      0.75       154
weighted avg       0.77      0.77      0.77       154



# Regression Learning

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt
import numpy as np

In [None]:
df = pd.read_csv('Walmart_sales.csv')
df.head()

Unnamed: 0,Store,Date,Weekly_Sales,Holiday_Flag,Temperature,Fuel_Price,CPI,Unemployment
0,1,05-02-2010,1643690.9,0,42.31,2.572,211.096358,8.106
1,1,12-02-2010,1641957.44,1,38.51,2.548,211.24217,8.106
2,1,19-02-2010,1611968.17,0,39.93,2.514,211.289143,8.106
3,1,26-02-2010,1409727.59,0,46.63,2.561,211.319643,8.106
4,1,05-03-2010,1554806.68,0,46.5,2.625,211.350143,8.106


In [None]:
df.drop("Date",axis=1,inplace=True)
df.drop("Holiday_Flag",axis=1,inplace=True)
df

Unnamed: 0,Store,Weekly_Sales,Temperature,Fuel_Price,CPI,Unemployment
0,1,1643690.90,42.31,2.572,211.096358,8.106
1,1,1641957.44,38.51,2.548,211.242170,8.106
2,1,1611968.17,39.93,2.514,211.289143,8.106
3,1,1409727.59,46.63,2.561,211.319643,8.106
4,1,1554806.68,46.50,2.625,211.350143,8.106
...,...,...,...,...,...,...
6430,45,713173.95,64.88,3.997,192.013558,8.684
6431,45,733455.07,64.89,3.985,192.170412,8.667
6432,45,734464.36,54.47,4.000,192.327265,8.667
6433,45,718125.53,56.47,3.969,192.330854,8.667


In [None]:
df['Weekly_Sales'].replace(0,np.nan, inplace=True)
df['Fuel_Price'].replace(0,np.nan, inplace=True)
df['CPI'].replace(0,np.nan, inplace=True)
df['Unemployment'].replace(0,np.nan, inplace=True)
Untitled11.

Unnamed: 0,Store,Weekly_Sales,Temperature,Fuel_Price,CPI,Unemployment
0,1,1643690.9,42.31,2.572,211.096358,8.106
1,1,1641957.44,38.51,2.548,211.24217,8.106
2,1,1611968.17,39.93,2.514,211.289143,8.106
3,1,1409727.59,46.63,2.561,211.319643,8.106
4,1,1554806.68,46.5,2.625,211.350143,8.106


In [None]:
x, y = df.drop("CPI", axis = 1), df.CPI
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)

In [None]:
regressor = DecisionTreeRegressor(random_state = 0, max_depth = 4)
regressor.fit(x_train, y_train)
y_pred = regressor.predict(x_test)
y_pred

array([190.04235453, 147.31164179, 212.91924216, ..., 163.44443931,
       163.44443931, 163.44443931])

In [None]:
plt.figure(figsize=(10,8), dpi=500)
plot_tree(regressor , feature_names=x.columns);