In [1]:
import pandas as pd
import numpy as np
import matplotlib as plt

In [2]:
df = pd.read_csv('/content/DT-Credit.csv')

In [3]:
df.head()

Unnamed: 0,Income,Limit,Rating,Cards,Age,Education,Own,Student,Married,Region,Balance
0,14.891,3606,283,2,34,11,No,No,Yes,South,333
1,106.025,6645,483,3,82,15,Yes,Yes,Yes,West,903
2,104.593,7075,514,4,71,11,No,No,No,West,580
3,148.924,9504,681,3,36,11,Yes,No,No,West,964
4,55.882,4897,357,2,68,16,No,No,Yes,South,331


# ***ZeroR***

In [4]:
most_frequent_class = df["Student"].value_counts().idxmax()
accuracy = df["Student"].value_counts().max() / len(df["Student"])
print(f"most frequent class (zeroR prediction): {most_frequent_class}")
print(f"Accuracy (zeroR prediction): {accuracy:.2f}")

most frequent class (zeroR prediction): No
Accuracy (zeroR prediction): 0.90


# ***OneR***

In [5]:
columns = df.columns.drop("Student")
best_rule = None
lowest_error = float("inf")

In [6]:
for col in columns:
    rule = df.groupby(col)["Student"].agg(lambda x: x.value_counts().idxmax())

    predictions = df[col].map(rule)

    error_rate = (predictions != df["Student"]).mean()

    print(f"Feature: {col}, Error Rate: {error_rate:.6f}")

    if error_rate < lowest_error:
        best_rule = (col, rule)
        lowest_error = error_rate

print(f"Best Feature: {best_rule[0]}, Rule: {best_rule[1]}")

df["OneR_Pred"] = df[best_rule[0]].map(best_rule[1])
df.head()

Feature: Income, Error Rate: 0.002500
Feature: Limit, Error Rate: 0.007500
Feature: Rating, Error Rate: 0.050000
Feature: Cards, Error Rate: 0.100000
Feature: Age, Error Rate: 0.097500
Feature: Education, Error Rate: 0.100000
Feature: Own, Error Rate: 0.100000
Feature: Married, Error Rate: 0.100000
Feature: Region, Error Rate: 0.100000
Feature: Balance, Error Rate: 0.010000
Best Feature: Income, Rule: Income
10.354      No
10.363      No
10.403      No
10.503      No
10.588      No
          ... 
163.329     No
180.379    Yes
180.682     No
182.728     No
186.634     No
Name: Student, Length: 399, dtype: object


Unnamed: 0,Income,Limit,Rating,Cards,Age,Education,Own,Student,Married,Region,Balance,OneR_Pred
0,14.891,3606,283,2,34,11,No,No,Yes,South,333,No
1,106.025,6645,483,3,82,15,Yes,Yes,Yes,West,903,Yes
2,104.593,7075,514,4,71,11,No,No,No,West,580,No
3,148.924,9504,681,3,36,11,Yes,No,No,West,964,No
4,55.882,4897,357,2,68,16,No,No,Yes,South,331,No


# ***K-Nearest-Neighbor Classifiers***

In [7]:
df = pd.read_csv('/content/DT-Credit.csv')

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.metrics import accuracy_score, mean_squared_error, r2_score

In [9]:
label_encoders = {}
categorical_columns = ["Own", "Student", "Married", "Region"]
for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

In [10]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df.drop(columns=["Student", "Region", "Balance"]))

In [11]:
print("Binary Classification (Student column)")
X = X_scaled
y_binary = df["Student"]

X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.15, random_state=42)
knn_binary = KNeighborsClassifier(n_neighbors=2)
knn_binary.fit(X_train, y_train)

y_pred_binary = knn_binary.predict(X_test)
accuracy_binary = accuracy_score(y_test, y_pred_binary)
print(f"KNN Binary Classification Accuracy: {accuracy_binary:.4f}")

Binary Classification (Student column)
KNN Binary Classification Accuracy: 0.8833


In [12]:
print("Multi-Class Classification (Region column)")
y_multiclass = df["Region"]

X_train, X_test, y_train, y_test = train_test_split(X, y_multiclass, test_size=0.15, random_state=42)
knn_multiclass = KNeighborsClassifier(n_neighbors=3)
knn_multiclass.fit(X_train, y_train)

y_pred_multiclass = knn_multiclass.predict(X_test)
accuracy_multiclass = accuracy_score(y_test, y_pred_multiclass)
print(f"KNN Multi-Class Classification Accuracy: {accuracy_multiclass:.4f}")

Multi-Class Classification (Region column)
KNN Multi-Class Classification Accuracy: 0.3000


In [13]:
print("Regression (Balance column)")
y_regression = df["Balance"]

X_train, X_test, y_train, y_test = train_test_split(X, y_regression, test_size=0.15, random_state=42)
knn_regressor = KNeighborsRegressor(n_neighbors=3)
knn_regressor.fit(X_train, y_train)

y_pred_regression = knn_regressor.predict(X_test)
mse = mean_squared_error(y_test, y_pred_regression)
r2 = r2_score(y_test, y_pred_regression)
print(f"KNN Regression Mean Squared Error: {mse:.4f}")
print(f"KNN Regression R^2 Score: {r2:.4f}")

Regression (Balance column)
KNN Regression Mean Squared Error: 52878.5370
KNN Regression R^2 Score: 0.6514


# ***Naive Bayesian Classifier***

In [14]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

In [15]:
df = pd.read_csv('/content/DT-Credit.csv')

In [16]:
label_encoders = {}
categorical_columns = ["Own", "Married", "Region", "Student"]
for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

In [17]:
print("Binary Classification (Student column)")
target_binary = "Student"
X_binary = df.drop(columns=[target_binary])
y_binary = df[target_binary]

X_train, X_test, y_train, y_test = train_test_split(X_binary, y_binary, test_size=0.15, random_state=42)
nb_binary = GaussianNB()
nb_binary.fit(X_train, y_train)

y_pred_binary = nb_binary.predict(X_test)
accuracy_binary = accuracy_score(y_test, y_pred_binary)
print(f"Naive Bayes Binary Classification Accuracy: {accuracy_binary:.4f}")

Binary Classification (Student column)
Naive Bayes Binary Classification Accuracy: 0.8833


In [18]:
print("Multi-Class Classification (Region column)")
target_multiclass = "Region"
X_multiclass = df.drop(columns=[target_multiclass])
y_multiclass = df[target_multiclass]

X_train, X_test, y_train, y_test = train_test_split(X_multiclass, y_multiclass, test_size=0.15, random_state=42)
nb_multiclass = GaussianNB()
nb_multiclass.fit(X_train, y_train)

y_pred_multiclass = nb_multiclass.predict(X_test)
accuracy_multiclass = accuracy_score(y_test, y_pred_multiclass)
print(f"Naive Bayes Multi-Class Classification Accuracy: {accuracy_multiclass:.4f}")

Multi-Class Classification (Region column)
Naive Bayes Multi-Class Classification Accuracy: 0.4667


# ***Support vector machine (SVM)***

In [19]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [20]:
df = pd.read_csv('/content/DT-Credit.csv')

In [21]:
label_encoders = {}
categorical_columns = ["Own", "Married", "Region", "Student"]
for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

In [22]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df.drop(columns=["Student", "Region"]))

In [23]:
print("Binary Classification (Student column)")
target_binary = "Student"
X_binary = X_scaled
y_binary = df[target_binary]

X_train, X_test, y_train, y_test = train_test_split(X_binary, y_binary, test_size=0.15, random_state=42)
svm_binary = SVC(kernel="linear", random_state=42)
svm_binary.fit(X_train, y_train)

y_pred_binary = svm_binary.predict(X_test)
accuracy_binary = accuracy_score(y_test, y_pred_binary)
print(f"SVM Binary Classification Accuracy: {accuracy_binary:.4f}")

Binary Classification (Student column)
SVM Binary Classification Accuracy: 0.9833


In [24]:
print("Multi-Class Classification (Region column)")
target_multiclass = "Region"
X_multiclass = X_scaled
y_multiclass = df[target_multiclass]

X_train, X_test, y_train, y_test = train_test_split(X_multiclass, y_multiclass, test_size=0.15, random_state=42)
svm_multiclass = SVC(kernel="linear", random_state=42)
svm_multiclass.fit(X_train, y_train)

y_pred_multiclass = svm_multiclass.predict(X_test)
accuracy_multiclass = accuracy_score(y_test, y_pred_multiclass)
print(f"SVM Multi-Class Classification Accuracy: {accuracy_multiclass:.4f}")

Multi-Class Classification (Region column)
SVM Multi-Class Classification Accuracy: 0.5000


# ***Support vector regression (SVR)***

In [25]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score

In [26]:
df = pd.read_csv('/content/DT-Credit.csv')

In [27]:
target = "Income"

In [28]:
label_encoders = {}
categorical_columns = ["Own", "Student", "Married", "Region"]
for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

In [29]:
X = df.drop(columns=[target])
y = df[target]

In [30]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [31]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.15, random_state=42)

In [32]:
svr = SVR(kernel="rbf", C=1.0, epsilon=0.1)
svr.fit(X_train, y_train)

In [33]:
y_pred = svr.predict(X_test)

In [34]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [35]:
print(f"SVR Mean Squared Error: {mse:.4f}")
print(f"SVR R^2 Score: {r2:.4f}")

SVR Mean Squared Error: 1152.7782
SVR R^2 Score: 0.1445
