# **Regression**

## Data

In [0]:
# Import Packages
import pandas as pd

# Import unscaled, preprocessed data 
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

# Create a categorical response variable from Total Expenditure (1 is high, 0 is low)
def classifyexp(exp):
    if ((exp >= 0) and (exp <=5.8)):
        return 0
    elif ((exp > 5.8) and (exp <= 15)):
        return 1

test['TExp'] = test['TExpenditure'].apply(classifyexp)
train['TExp'] = train['TExpenditure'].apply(classifyexp)

# Split into test and training data
X_train, y_train = train.drop(columns=['TExpenditure', 'Year', 'TExp', 'Status']), pd.DataFrame(train.TExp)
X_test, y_test = test.drop(columns=['TExpenditure', 'Year', 'TExp', 'Status']), pd.DataFrame(test.TExp)


## Voting Classifier

In [0]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier

# Training classifiers
clf1 = DecisionTreeClassifier(max_depth=4)
clf2 = KNeighborsClassifier(n_neighbors=9)
clf3 = SVC(C=0.01, gamma=0.001, kernel='poly', probability=True)
eclf = VotingClassifier(estimators=[('dt', clf1), ('knn', clf2),
                                    ('svc', clf3)],
                        voting='soft', weights=[2, 1, 2])

# Fit each classifier
clf1.fit(X_train, y_train)
clf2.fit(X_train, y_train)
clf3.fit(X_train, y_train)
eclf.fit(X_train, y_train)

  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


In [0]:
# Plotting decision regions
x_min, x_max = X_train[:, 0].min() - 1, X_train[:, 0].max() + 1
y_min, y_max = X_train[:, 1].min() - 1, X_train[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
                     np.arange(y_min, y_max, 0.1))

f, axarr = plt.subplots(2, 2, sharex='col', sharey='row', figsize=(10, 8))

for idx, clf, titles in zip(product([0, 1], [0, 1]),
                        [clf1, clf2, clf3, eclf],
                        ['Decision Tree (depth=4)', 'KNN (k=9)',
                         'Poly Kernel SVM', 'Soft Voting']):

    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    axarr[idx[0], idx[1]].contourf(xx, yy, Z, alpha=0.4)
    axarr[idx[0], idx[1]].scatter(X_train[:, 0], X_train[:, 1], c=y,
                                  s=20, edgecolor='k')
    axarr[idx[0], idx[1]].set_title(titles)

plt.show()