In [26]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.multiclass import OneVsRestClassifier

In [36]:
data = pd.read_csv("./dataset/iris_data.csv")

In [37]:
columns = ["petal_length", "petal_width", "sepal_length", "sepal_width", "class"]
data.columns = columns

In [38]:
X = data.iloc[:, :-1]
y = data.iloc[:, -1]

In [39]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3, random_state=42)

In [40]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [41]:
model = OneVsRestClassifier(LogisticRegression())
model.fit(X_train, y_train)

In [42]:
y_pred = model.predict(X_test)

In [43]:
print(f"Accuracy: {accuracy_score(y_test,y_pred)}")
print(f"confusion matrix: {confusion_matrix(y_test, y_pred)}")
print(f"Classification report: \n{classification_report(y_test, y_pred)}")

Accuracy: 0.8222222222222222
confusion matrix: [[19  6]
 [ 2 18]]
Classification report: 
                 precision    recall  f1-score   support

    Iris-setosa       0.90      0.76      0.83        25
Iris-versicolor       0.75      0.90      0.82        20

       accuracy                           0.82        45
      macro avg       0.83      0.83      0.82        45
   weighted avg       0.84      0.82      0.82        45



### Decision Tree

In [1]:
import numpy as np
import pandas as pd

In [18]:
dataset = pd.read_csv("./dataset/weather.csv")

In [19]:
dataset.head()

Unnamed: 0,Outlook,Temp,Humidity,Windy,Play
0,rainy,hot,high,0,0
1,rainy,hot,high,1,0
2,overcast,hot,high,0,1
3,sunny,mild,high,0,1
4,sunny,cool,normal,0,1


In [25]:
dataset['Outlook'].value_counts(), dataset['Temp'].value_counts(), dataset['Humidity'].value_counts(), dataset['Windy'].value_counts()

(Outlook
 rainy       5
 sunny       5
 overcast    4
 Name: count, dtype: int64,
 Temp
 mild    6
 hot     4
 cool    4
 Name: count, dtype: int64,
 Humidity
 high      7
 normal    7
 Name: count, dtype: int64,
 Windy
 0    8
 1    6
 Name: count, dtype: int64)

In [34]:
from sklearn.preprocessing import LabelEncoder

In [27]:
encoder = LabelEncoder()
dataset['Outlook'] = encoder.fit_transform(dataset['Outlook'])
dataset['Temp'] = encoder.fit_transform(dataset['Temp'])
dataset['Humidity'] = encoder.fit_transform(dataset['Humidity'])


In [28]:
dataset.head()

Unnamed: 0,Outlook,Temp,Humidity,Windy,Play
0,1,1,0,0,0
1,1,1,0,1,0
2,0,1,0,0,1
3,2,2,0,0,1
4,2,0,1,0,1


In [30]:
X = dataset.iloc[:,:4]
y = dataset.iloc[:,-1]

In [31]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3, random_state=42)

In [36]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [32]:
from sklearn.tree import DecisionTreeClassifier

In [37]:
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

In [38]:
y_pred = model.predict(X_test)

In [39]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(f"Classification Report: {classification_report(y_test, y_pred)}")
print(f"Confusion Matrix: {confusion_matrix(y_test,y_pred)}")

Accuracy: 0.6
Classification Report:               precision    recall  f1-score   support

           0       0.50      0.50      0.50         2
           1       0.67      0.67      0.67         3

    accuracy                           0.60         5
   macro avg       0.58      0.58      0.58         5
weighted avg       0.60      0.60      0.60         5

Confusion Matrix: [[1 1]
 [1 2]]


### Bank Note Authentication - Decision Tree

In [51]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [41]:
dataset = pd.read_csv("./dataset/BankNote_Authentication.csv")

In [42]:
dataset.head()

Unnamed: 0,variance,skewness,curtosis,entropy,class
0,3.6216,8.6661,-2.8073,-0.44699,0
1,4.5459,8.1674,-2.4586,-1.4621,0
2,3.866,-2.6383,1.9242,0.10645,0
3,3.4566,9.5228,-4.0112,-3.5944,0
4,0.32924,-4.4552,4.5718,-0.9888,0


In [43]:
dataset.shape

(1372, 5)

In [46]:
X = dataset.iloc[:,:4]
y = dataset.iloc[:,-1]

In [49]:
X_train, X_test, y_train, y_test =  train_test_split(X,y,test_size=0.3, random_state=42)

In [50]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [54]:
model = DecisionTreeClassifier(criterion="gini")
model.fit(X_train, y_train)

In [55]:
y_pred = model.predict(X_test)

In [57]:
print(f"Accuracy Score: {accuracy_score(y_test, y_pred)}")
print(f"classification_report: \n{classification_report(y_test, y_pred)}")
print(f"confusion_matrix: \n{confusion_matrix(y_test, y_pred)}")

Accuracy Score: 0.9854368932038835
classification_report: 
              precision    recall  f1-score   support

           0       0.97      1.00      0.99       229
           1       1.00      0.97      0.98       183

    accuracy                           0.99       412
   macro avg       0.99      0.98      0.99       412
weighted avg       0.99      0.99      0.99       412

confusion_matrix: 
[[229   0]
 [  6 177]]
