# Machine Learning II: Assignments 2
Leon Berghoff, Jiawei Li, Strahinja Trenkic, Otto Riess

In [1]:
import pandas as pd
from bank_mkt import import_dataset, split_dataset, transform
from sklearn.preprocessing import FunctionTransformer, StandardScaler, OneHotEncoder
from sklearn.pipeline import make_pipeline
from sklearn.compose import ColumnTransformer

# Data Preparation

Term deposits are defined as a fixed-term investment that includes the deposit of money into an account at a financial institution. Such an instrument would generate revenue for the bank, hence the bank records the outcomes of these phone calls along side other data related to the person being called, the economic indicators and certain parameters of the previous contact with the given person. By analysing previous phone calls data, the bank would like to improve its telemarketing results. The bank marketing dataset was collected by Moro, Cortez, and Rita (2014) and ordered by date ranging from May 2008 to November 2010.

In [2]:
bank_mkt = import_dataset("BankMarketing.csv")

func_transformer = FunctionTransformer(transform)

num_features = ["month",
                "day_of_week",
                "campaign",
                "previous",
                "emp.var.rate",
                "cons.price.idx",
                "cons.conf.idx",
                "euribor3m",
                "nr.employed"]

standard_scaler = ColumnTransformer([("scaler", StandardScaler(), num_features)], remainder="passthrough")

preprocessor = make_pipeline(func_transformer, standard_scaler)

X_train, y_train, X_test, y_test, *_ = split_dataset(bank_mkt, preprocessor, test_size=0.8)

# Metrics

# Hyperparameters

In [3]:
from sklearn.svm import SVC
from sklearn.utils.fixes import loguniform
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

In [4]:
poly_svm = SVC(kernel="poly", class_weight="balanced", random_state=42)

poly_distributions = {"C": loguniform(1e0, 1e2),
    "degree": [1, 2, 3],
    "gamma": ["scale"],
    }

poly_search = RandomizedSearchCV(poly_svm,
    poly_distributions,
    scoring="average_precision",
    cv=5,
    n_jobs=-1,
    n_iter=6)

poly_fit = poly_search.fit(X_train, y_train)
poly_results = poly_fit.cv_results_
poly_best_params = poly_fit.best_params_
poly_best_score = poly_fit.best_score_

print(f"best parameters found: {poly_best_params}, with mean test score: {poly_best_score}")

best parameters found: {'C': 19.424418394606366, 'degree': 1, 'gamma': 'scale'}, with mean test score: 0.5104245363677274
