In [None]:
from ucimlrepo import fetch_ucirepo

# fetch dataset
mushroom = fetch_ucirepo(id=73)

# data (as pandas dataframes)
X = mushroom.data.features
y = mushroom.data.targets

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder
import seaborn as sns
import matplotlib.pyplot as plt
import random

# Example data loading
# X, y = pd.read_csv('your_dataset.csv').drop('target', axis=1), pd.read_csv('your_dataset.csv')['target']

# Identify categorical columns
categorical_columns = X.select_dtypes(include=['object']).columns

# Convert categorical columns to numeric using LabelEncoder
# label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    X.loc[:, col] = le.fit_transform(X[col])
    # label_encoders[col] = le


In [None]:
# Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = random.randint(0, 100), train_size=0.8, test_size=0.2)

y_train = y_train.squeeze()
y_test = y_test.squeeze()

# Train the RandomForestClassifier
clf = RandomForestClassifier(n_estimators=100, random_state=random.randint(0, 100), max_depth=50)
clf.fit(X_train, y_train)

# Predict the classes for the test set
y_pred = clf.predict(X_test)

# Generate the confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Plotting the confusion matrix
sns.heatmap(cm, annot=True, fmt='g')
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix')
plt.show()

In [None]:
from ucimlrepo import fetch_ucirepo

# fetch dataset 
eeg_eye_state = fetch_ucirepo(id=264)

# data (as pandas dataframes)
X = eeg_eye_state.data.features
y = eeg_eye_state.data.targets

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
import sklearn.metrics as metrics
import sklearn.model_selection as ms

accuracy = metrics.make_scorer(metrics.accuracy_score)

y_flat = y.squeeze()

clf_score = cross_val_score(
    DecisionTreeClassifier(random_state=random.randint(0, 100), max_depth=50),
    X, y_flat, cv=ms.KFold(shuffle=True), scoring=accuracy
)
rf_clf_score = cross_val_score(
    RandomForestClassifier(random_state=random.randint(0, 100), n_estimators=100, max_depth=50),
    X, y_flat, cv=ms.KFold(shuffle=True), scoring=accuracy
)
knn_score = cross_val_score(
    KNeighborsClassifier(n_neighbors=5),
    X, y_flat, cv=ms.KFold(shuffle=True), scoring=accuracy
)
gnb_score = cross_val_score(
    GaussianNB(),
    X, y_flat, cv=ms.KFold(shuffle=True), scoring=accuracy
)

print(f"Decision Tree Classifier: {clf_score.mean()}")
print(f"Random Forest Classifier: {rf_clf_score.mean()}")
print(f"K Nearest Neighbors Classifier: {knn_score.mean()}")
print(f"Gaussian Naive Bayes Classifier: {gnb_score.mean()}")

In [None]:
from sklearn.tree import DecisionTreeClassifier  # For classification tasks

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random.randint(0, 100), train_size=0.8, test_size=0.2)

y_train = y_train.squeeze()
y_test = y_test.squeeze()

clf = DecisionTreeClassifier(random_state=random.randint(0, 100), max_depth=50)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

cm = confusion_matrix(y_test, y_pred)

sns.heatmap(cm, annot=True, fmt='g')


In [None]:
# do the thing for random forest
from sklearn.ensemble import RandomForestClassifier

rf_clf = RandomForestClassifier(n_estimators=100, random_state=random.randint(0, 100), max_depth=50)

rf_clf.fit(X_train, y_train)

y_pred = rf_clf.predict(X_test)

cm = confusion_matrix(y_test, y_pred)

sns.heatmap(cm, annot=True, fmt='g')

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)

cm = confusion_matrix(y_test, y_pred)

sns.heatmap(cm, annot=True, fmt='g')

In [None]:
from sklearn.naive_bayes import GaussianNB


# Create Gaussian Naive Bayes classifier
gnb = GaussianNB()
gnb.fit(X_train, y_train)

# Predict and evaluate
y_pred = gnb.predict(X_test)

cm = confusion_matrix(y_test, y_pred)

sns.heatmap(cm, annot=True, fmt='g')


In [None]:
# read haberman.data

haberman = pd.read_csv('haberman.data', header=None)

X = haberman.drop(3, axis=1)
y = haberman[3]

In [None]:
# do decision tree
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random.randint(0, 100), train_size=0.8, test_size=0.2)

y_train = y_train.squeeze()
y_test = y_test.squeeze()

clf = DecisionTreeClassifier(random_state=random.randint(0, 100), max_depth=50)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

cm = confusion_matrix(y_test, y_pred)

sns.heatmap(cm, annot=True, fmt='g')

In [None]:
# do random forest

clf = RandomForestClassifier(n_estimators=100, random_state=random.randint(0, 100), max_depth=50)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

cm = confusion_matrix(y_test, y_pred)

sns.heatmap(cm, annot=True, fmt='g')

In [None]:
X = [[0, 0, 0], [1, 1, 1], [0, 0, 0], [1, 1, 0], [0, 0, 0], [1, 1, 1], [0, 0, 0], [1, 1, 1], [0, 0, 0], [1, 1, 1]]
y = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1]

score = cross_val_score(
    DecisionTreeClassifier(random_state=random.randint(0, 100), max_depth=50),
    X, y, cv=5, scoring='accuracy'
)

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random.randint(0, 100), train_size=0.8, test_size=0.2)

clf = DecisionTreeClassifier(random_state=random.randint(0, 100), max_depth=50)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

cm = confusion_matrix(y_test, y_pred)

sns.heatmap(cm, annot=True, fmt='g')

print(score.mean())