<a href="https://colab.research.google.com/github/apriandito/workshop-fmcg-2/blob/main/python/001_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

*Muhammad Apriandito - FMCG Workshop*


---



### **Load Packages and Modules**

In [None]:
# Load Packages
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load Modules
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn import tree
from sklearn.naive_bayes import GaussianNB 
from sklearn import metrics
from sklearn.metrics import roc_curve, auc

### **Load Data**

In [None]:
# Load Data
df  = pd.read_csv("https://raw.githubusercontent.com/apriandito/workshop-fmcg-2/main/data/milk-3.csv")

### **Data Exploration**

In [None]:
# Show Data
df

In [None]:
# Check Data Information
df.info()

In [None]:
# Visualize Grade Distribution
sns.countplot(x="grade", 
              data=df)

In [None]:
# Visualize Pairplot
sns.pairplot(data=df)

### **Select Features and Target**

In [None]:
# Select Features
feature = df[['ph', 
              'temprature',
              'taste', 
              'fat',
              'turbidity',
              'colour']]

In [None]:
# Select Target
target = df['grade']

In [None]:
# Set Training and Testing Data (70:30)
X_train, X_test, y_train, y_test  = train_test_split(feature , target, shuffle = True, test_size=0.3)

In [None]:
# Show the Training and Testing Data
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

### **Modeling**

In [None]:
# Modeling Decision Tree
dtc = tree.DecisionTreeClassifier()
dtc.fit(X_train, y_train)

# Predict to Test Data 
y_pred_dtc = dtc.predict(X_test)

In [None]:
tree.plot_tree(dtc)

In [None]:
# Modeling Naive Bayes Classifier
gnb = GaussianNB()
gnb.fit(X_train, y_train)

# Predict to Test Data
y_pred_gnb= gnb.predict(X_test)

### **Evaluation**

In [None]:
# Show the Accuracy, Precision, Recall
acc_dtc = metrics.accuracy_score(y_test, y_pred_dtc)
prec_dtc = metrics.precision_score(y_test, y_pred_dtc, average = 'weighted')
rec_dtc = metrics.recall_score(y_test, y_pred_dtc, average = 'weighted')
f1_dtc = metrics.f1_score(y_test, y_pred_dtc ,average = 'weighted')
kappa_dtc = metrics.cohen_kappa_score(y_test, y_pred_dtc)

print("Accuracy:", acc_dtc)
print("Precision:", prec_dtc)
print("Recall:", rec_dtc)
print("F1 Score:", f1_dtc)
print("Cohens Kappa Score:", kappa_dtc)

In [None]:
# Visualize ROC Curve: Decision Tree
y_pred_dtc_proba = dtc.predict_proba(X_test)[::,1]
fprdtc, tprdtc, _ = metrics.roc_curve(y_test,  y_pred_dtc_proba)
aucdtc = metrics.roc_auc_score(y_test, y_pred_dtc_proba)
plt.plot(fprdtc,tprdtc,label="Decision Tree, auc="+str(aucdtc))
plt.title('ROC Curve - Decision Tree')
plt.xlabel('false positive rate') 
plt.ylabel('true positive rate')
plt.legend(loc=4)
plt.show()

In [None]:
# Show the Accuracy, Precision, Recall
acc_gnb = metrics.accuracy_score(y_test, y_pred_gnb)
prec_gnb = metrics.precision_score(y_test, y_pred_gnb, average = 'weighted')
rec_gnb = metrics.recall_score(y_test, y_pred_gnb, average = 'weighted')
f1_gnb = metrics.f1_score(y_test, y_pred_gnb, average = 'weighted')
kappa_gnb = metrics.cohen_kappa_score(y_test, y_pred_gnb)

print("Accuracy:", acc_gnb)
print("Precision:", prec_gnb)
print("Recall:", rec_gnb)
print("F1 Score:", f1_gnb)
print("Cohens Kappa Score:", kappa_gnb)

In [None]:
# ROC Curve: Naive Bayes
y_pred_gnb_proba = gnb.predict_proba(X_test)[::,1]
fprgnb, tprgnb, _ = metrics.roc_curve(y_test,  y_pred_gnb_proba)
aucgnb = metrics.roc_auc_score(y_test, y_pred_gnb_proba)
plt.plot(fprgnb,tprgnb,label="Naive Bayes, auc="+str(aucgnb))
plt.title('ROC Curve - Naive Bayes')
plt.xlabel('false positive rate') 
plt.ylabel('true positive rate')
plt.legend(loc=4)
plt.show()