In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import f1_score, make_scorer
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import MinMaxScaler

from sklearn.pipeline import Pipeline
from sklearn.model_selection import KFold
import time
from contextlib import contextmanager
from sklearn.neural_network import MLPClassifier
# from sklearn.preprocessing import Imputer
from sklearn.impute import SimpleImputer

@contextmanager
def timer(title):
    t0 = time.time()
    yield
    print("{} - done in {:.0f}s".format(title, time.time() - t0))
# Custom scorer for cross validation
scorer = make_scorer(f1_score, greater_is_better=True, average = 'macro')


In [2]:
def readCSV():
    train = pd.read_csv('../input/final_training_set.csv')
    test = pd.read_csv('../input/final_testing_set.csv')
    train = train.drop(train.columns[0], axis=1)
    test = test.drop(test.columns[0], axis=1)

    train.drop(columns=['idhogar','Id'], inplace=True)
    test.drop(columns=['idhogar','Id','Target'], inplace=True)
    return train, test

In [3]:

def model_training_MLP(train,test):
    y = train['Target']
    y = np.array(y).astype(int)
    train.drop(columns=['Target'], inplace=True)

    pipeline = Pipeline([('imputer', SimpleImputer(strategy = 'median')), 
                          ('scaler', MinMaxScaler())])

    # Fit and transform training data
    train_set = pipeline.fit_transform(train)
    test_set = pipeline.transform(test)

    clf = MLPClassifier(hidden_layer_sizes=(8))

    kf = KFold(n_splits=5)

    predicts_result = []
#     for train_indices, test_indices  in kf.split(train_set):

#         clf.fit(train_set[train_indices], y[train_indices])
#         predicts_result.append(clf.predict(test_set))
        
    clf.fit(train_set, y)
    predicts_result.append(clf.predict(test_set))

    return predicts_result

In [4]:
def main(debug = False):
    train, test = readCSV()

    with timer("Automatic Model Tuning..."):
        predicts_result = model_training_MLP(train,test)
    with timer("Final Submission"):
        results = pd.read_csv('../input/traintest-set/final_testing_set.csv')
        results['Target'] = np.array(predicts_result).mean(axis=0).round().astype(int)
        results = results[['idhogar','Target']].copy()
        test = pd.read_csv('../input/costa-rican-household-poverty-prediction/test.csv')
        test = test[['Id','idhogar']].copy()
        submission = pd.merge(test,results, on='idhogar', how='outer')
        submission.fillna(4, inplace=True)
        submission.drop(columns='idhogar', inplace= True)
        submission = submission.astype({'Target': int})
        submission.to_csv('submission.csv', index = False)

In [5]:
if __name__ == "__main__":
    with timer("Full model run"):
        main(debug= False)



Automatic Model Tuning... - done in 6s
Final Submission - done in 1s
Full model run - done in 7s


In [6]:
from IPython.display import FileLink
FileLink('submission.csv')
