In [1]:
pip install pandas numpy scikit-learn


Note: you may need to restart the kernel to use updated packages.


In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
import requests
from io import BytesIO, StringIO
import zipfile

# Load the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00222/bank-additional.zip"

# Download the ZIP file
response = requests.get(url)

# Extract the contents of the ZIP file
with zipfile.ZipFile(BytesIO(response.content)) as z:
    # Find the CSV file within the ZIP archive
    csv_filename = [f for f in z.namelist() if f.endswith('.csv')][0]
    
    # Read the CSV file
    with z.open(csv_filename) as f:
        bank_df = pd.read_csv(f, sep=';')

# Display the first few rows of the dataset
print(bank_df.head())



# Preprocess the data
# Encode categorical variables
le = LabelEncoder()
bank_df['y'] = le.fit_transform(bank_df['y'])  # Encode the target variable

# Select features and target variable
X = bank_df.drop('y', axis=1)
y = bank_df['y']

# One-hot encode categorical variables
X = pd.get_dummies(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build and train the decision tree classifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# Evaluate the classifier
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Print classification report and confusion matrix
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Make predictions
# Example: predicting on the first 5 samples of the test set
print("\nPredictions:")
print(clf.predict(X_test.head()))


   age        job  marital    education  default housing loan    contact  \
0   56  housemaid  married     basic.4y       no      no   no  telephone   
1   57   services  married  high.school  unknown      no   no  telephone   
2   37   services  married  high.school       no     yes   no  telephone   
3   40     admin.  married     basic.6y       no      no   no  telephone   
4   56   services  married  high.school       no      no  yes  telephone   

  month day_of_week  ...  campaign  pdays  previous     poutcome emp.var.rate  \
0   may         mon  ...         1    999         0  nonexistent          1.1   
1   may         mon  ...         1    999         0  nonexistent          1.1   
2   may         mon  ...         1    999         0  nonexistent          1.1   
3   may         mon  ...         1    999         0  nonexistent          1.1   
4   may         mon  ...         1    999         0  nonexistent          1.1   

   cons.price.idx  cons.conf.idx  euribor3m  nr.employed