In [1]:
import os
import json
from operator import itemgetter
from collections import defaultdict
from functools import partial
from math import isnan

import cv2
import dill
import numpy as np
import pandas as pd
import scipy as sp
from tqdm import tqdm
from joblib import Parallel, delayed

from sklearn.model_selection import GroupKFold
from sklearn.preprocessing import LabelEncoder

import xlearn as xl

In [26]:
train_df = pd.read_csv("/data/quy/petfinder-adoption-prediction/train.csv", encoding="utf-8")
test_df = pd.read_csv("/data/quy/petfinder-adoption-prediction/test.csv", encoding="utf-8")

smpsb_df = pd.read_csv("/data/quy/petfinder-adoption-prediction/sample_submission.csv")

if os.dir.ex
os.mkdir("./working")

FileExistsError: [Errno 17] File exists: './working'

In [3]:
train_df["group"] = 0
petid_map = {v: i for i, v in enumerate(pd.concat([train_df["PetID"], test_df["PetID"]]))}
rescuerid_encoder = LabelEncoder().fit(pd.concat([train_df["RescuerID"], test_df["RescuerID"]]))

for group, (_, group_idx) in enumerate(GroupKFold(n_splits=10).split(train_df,
                                                                     train_df["AdoptionSpeed"],
                                                                     rescuerid_encoder.transform(train_df["RescuerID"]))):
    train_df.loc[group_idx, "group"] = group

In [4]:
train_df.pivot_table(columns="AdoptionSpeed",
                     index="group",
                     values="Breed1",
                     aggfunc="count")

AdoptionSpeed,0,1,2,3,4
group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,52,253,425,415,355
1,34,304,397,336,429
2,37,282,439,338,404
3,37,322,366,350,424
4,32,284,372,329,482
5,33,314,398,338,416
6,41,379,385,280,414
7,50,283,453,291,422
8,51,357,387,272,432
9,43,312,415,310,419


In [5]:
class FFMformatter():
    def __init__(self,
                 numerical_field_max: int,
                 key_column="PetID"
                 ):
        
        self.numerical_field_max = numerical_field_max
        self.key_column = key_column

        self.numerical_field_last = -1
        self.numerical_field = defaultdict(lambda: self.new_numerical_field())
        self.field_nan = defaultdict(lambda: np.NaN)
        
        self.categorical_field_last = numerical_field_max
        self.categorical_feature_last = numerical_field_max
        self.categorical_field = defaultdict(lambda: self.new_categorical_field())
        self.categorical_feature = defaultdict(lambda: defaultdict(lambda: self.new_categorical_feature()))
        self.result = {}
        self.categorical_columns = set()


    def add_dataframe(self, df: pd.DataFrame):
        petids = df[self.key_column]
        for field in df.columns:
            if field == self.key_column:
                continue
            for petid, value in zip(petids, df[field]):
                if self.field_nan[field] == value or ((type(value) == float) and isnan(value)):
                    continue

                if field in self.categorical_columns:
                    field_id = self.categorical_field[field]
                    self.result[petid] += " {}:{}:1".format(field_id,
                                                            self.categorical_feature[field_id][value])
                
                else:
                    field_id = self.numerical_field[field]
                    self.result[petid] += " {}:{}:{}".format(field_id,
                                                             field_id,
                                                             value)

    def add_field_dataframe(self, df, field_col, value_col):
        field_id = self.categorical_field[field_col]
        for _, row in tqdm(df.iterrows()):
            petid = row[self.key_column]
            self.result[petid] += " {}:{}:{}".format(field_id,
                                                     self.categorical_feature[field_id][row[field_col]],
                                                     row[value_col])
        
    def add_dataframe_as_samefield(self, df, basecol=None):
        assert self.key_column in df.columns
        cols = [col for col in df.columns if col != self.key_column]
        if basecol is None:
            basecol = cols[0]
        field_id = self.categorical_field[basecol]
        key_vals = df[self.key_column].values
        for col in cols:
            for key_val, value in zip(key_vals, df[col].values):
                if self.field_nan[basecol] == value or ((type(value) == float) and isnan(value)):
                    continue
                self.result[key_val] += " {}:{}:{}".format(field_id,
                                                           self.categorical_feature[field_id][col],
                                                           value)


    def set_categorical_columns(self, columns):
        if type(columns) == str:
            self.categorical_columns.add(columns)
        else:
            self.categorical_columns.update(columns)


    def set_multicolumns_as_column(self, columns):
        self.set_categorical_columns(columns)
        base_field = self.categorical_field[columns[0]]
        for col in columns:
            self.categorical_field[col] = base_field


    def add_Petids(self, keys, targets=None):
        if targets is not None:
            for key, target in zip(keys, targets):
                self.result[key] = str(target)
        else:
            for key in keys:
                self.result[key] = "-1"

    def set_field_nanvalue(self, col, nanvalue=np.NaN):
        self.field_nan[col] = nanvalue

    def new_numerical_field(self):
        self.numerical_field_last += 1
        if self.numerical_field_last > self.numerical_field_max:
            raise
        return self.numerical_field_last * 1

    def new_categorical_field(self):
        self.categorical_field_last += 1
        return self.categorical_field_last * 1
    
    def new_categorical_feature(self):
        self.categorical_feature_last += 1
        return self.categorical_feature_last * 1

In [6]:
df2ffm = FFMformatter(numerical_field_max=200)

df2ffm.add_Petids(train_df["PetID"].values, train_df["AdoptionSpeed"].values)
df2ffm.add_Petids(test_df["PetID"].values)

# tabular data

In [7]:
train_df["care_count"] = (train_df["Vaccinated"] == 1).astype(np.uint8) + (train_df["Dewormed"] == 1) + (train_df["Sterilized"] == 1)
test_df["care_count"] = (test_df["Vaccinated"] == 1).astype(np.uint8) + (test_df["Dewormed"] == 1) + (test_df["Sterilized"] == 1)

train_df["care_uncertain_count"] = ((train_df["Vaccinated"] == 3).astype(np.uint8) +
                                    (train_df["Dewormed"] == 3) +
                                    (train_df["Sterilized"] == 3))
test_df["care_uncertain_count"] = ((test_df["Vaccinated"] == 3).astype(np.uint8) +
                                   (test_df["Dewormed"] == 3) +
                                   (test_df["Sterilized"] == 3))

train_df["color_counts"] = 3 - (train_df.filter(regex="^Color") == 0).sum(axis=1)
test_df["color_counts"] = 3 - (test_df.filter(regex="^Color") == 0).sum(axis=1)

rescuer_count_map = pd.concat([train_df["RescuerID"], test_df["RescuerID"]]).value_counts().to_dict()
train_df["Rescuer_count"] = np.log(train_df["RescuerID"].map(rescuer_count_map))
test_df["Rescuer_count"] = np.log(test_df["RescuerID"].map(rescuer_count_map))

train_df["Fee"] = np.log1p(train_df["Fee"])
test_df["Fee"] = np.log1p(test_df["Fee"])

train_df["Age"] = np.log1p(train_df["Age"])
test_df["Age"] = np.log1p(test_df["Age"])

train_df["description_len"] = np.log1p(train_df["Description"].fillna("").str.len())
test_df["description_len"] = np.log1p(test_df["Description"].fillna("").str.len())

train_df["name_len"] = np.log1p(train_df["Name"].fillna("").str.len())
test_df["name_len"] = np.log1p(test_df["Name"].fillna("").str.len())

# https://www.kaggle.com/c/petfinder-adoption-prediction/discussion/78040

# state GDP: https://en.wikipedia.org/wiki/List_of_Malaysian_states_by_GDP
state_gdp = {
    41336: 116.679,
    41325: 40.596,
    41367: 23.02,
    41401: 190.075,
    41415: 5.984,
    41324: 37.274,
    41332: 42.389,
    41335: 52.452,
    41330: 67.629,
    41380: 5.642,
    41327: 81.284,
    41345: 80.167,
    41342: 121.414,
    41326: 280.698,
    41361: 32.270
}

# state population: https://en.wikipedia.org/wiki/Malaysia
state_population = {
    41336: 33.48283,
    41325: 19.47651,
    41367: 15.39601,
    41401: 16.74621,
    41415: 0.86908,
    41324: 8.21110,
    41332: 10.21064,
    41335: 15.00817,
    41330: 23.52743,
    41380: 2.31541,
    41327: 15.61383,
    41345: 32.06742,
    41342: 24.71140,
    41326: 54.62141,
    41361: 10.35977
}

train_df["state_gdp"] = train_df.State.map(state_gdp)
test_df["state_gdp"] = test_df.State.map(state_gdp)

train_df["state_population"] = train_df.State.map(state_population)
test_df["state_population"] = test_df.State.map(state_population)

for col in ["description_len", "name_len", "state_gdp", "state_population", "Rescuer_count", "Fee", "Age"]:
    mean, std = train_df[col].mean(), train_df[col].std()
    train_df[col] = (train_df[col] - mean)/std
    test_df[col] = (test_df[col] - mean)/std

In [8]:
train_df.head()

Unnamed: 0,Type,Name,Age,Breed1,Breed2,Gender,Color1,Color2,Color3,MaturitySize,...,AdoptionSpeed,group,care_count,care_uncertain_count,color_counts,Rescuer_count,description_len,name_len,state_gdp,state_population
0,2,Nibble,-0.390818,299,0,1,1,7,0,1,...,2,7,0,0,2,0.033127,0.499203,-0.055035,0.73954,0.833461
1,2,No Name Yet,-1.070282,265,0,1,1,2,0,2,...,0,3,0,3,2,-1.111528,-0.524061,0.60947,-0.447795,-1.225197
2,1,Brisco,-1.070282,307,0,1,2,7,0,2,...,3,0,2,0,2,2.262287,0.582625,-0.055035,0.73954,0.833461
3,1,Miko,-0.17208,307,0,2,1,2,0,2,...,2,6,2,0,2,1.041896,-0.328732,-0.469857,-0.447795,-1.225197
4,1,Hunter,-1.070282,307,0,1,1,0,0,2,...,2,5,0,0,1,1.584551,0.57556,-0.055035,0.73954,0.833461


In [9]:
df2ffm.set_multicolumns_as_column(["Color1", "Color2", "Color3"])
df2ffm.set_multicolumns_as_column(["Breed1", "Breed2"])
df2ffm.set_categorical_columns(["Gender", "State"])

for col in ["Color1", "Color2", "Color3", "Breed1", "Breed2", "FurLength", "MaturitySize", "Health", "State"]:
    df2ffm.set_field_nanvalue(col, 0)

for col in ["Vacciated", "Dewormed", "Sterilized"]:
    df2ffm.set_field_nanvalue(col, 3)

drop_col = ["Name", "Description", "RescuerID"]
df2ffm.add_dataframe(train_df.drop(drop_col + ["AdoptionSpeed", "group"], axis=1))
df2ffm.add_dataframe(test_df.drop(drop_col, axis=1))

# metadata

In [10]:
def load_metadata(path):
    file = path.split("/")[-1]
    pet_id = file[:-5].split("-")[0]
    file_id = file[:-5].split("-")[1]
    
    with open(path, encoding="utf-8") as f:
        jfile = json.loads(f.read())
    response = {"labels": [],
                "text": {"PetID": pet_id,
                         "FileID": file_id,
                         "description": ""}}
    
    if "labelAnnotations" in jfile.keys():
        for anot in jfile["labelAnnotations"]:
            response["labels"].append({"PetID": pet_id,
                                       "FileID": file_id,
                                       "description": anot["description"],
                                       "score": anot["score"]})

    if "imagePropertiesAnnotation" in jfile.keys():
        colors = np.zeros((10, 1, 3), dtype=np.uint8)
        scores = np.zeros(10)
        fractions = np.zeros(10)
        getscore = itemgetter("score")
        for i, color in enumerate(sorted(jfile['imagePropertiesAnnotation']["dominantColors"]["colors"],
                                         key=getscore,
                                         reverse=True)
                                 ):

            for j, c in enumerate(["red", "green", "blue"]):
                if not color["color"].get(c) is None:
                    colors[i, 0, j] = color["color"][c] 
                
            scores[i] = color["score"]
            fractions[i] = color["pixelFraction"]
        hsv = cv2.cvtColor(colors, cv2.COLOR_RGB2HSV_FULL)
        response["property"] = {"PetID": pet_id,
                                "FileID": file_id,
                                "top_red": colors[0, 0, 0],
                                "top_green": colors[0, 0, 1],
                                "top_blue": colors[0, 0, 2],
                                "top_score": scores[0],
                                "top_fraction": fractions[0],
                                "top_hue": hsv[0, 0, 0],
                                "top_saturation": hsv[0, 0, 1],
                                "top_brightness": hsv[0, 0, 2],
                                "top3_score": scores[:3].sum(),
                                "top3_fraction": fractions[:3].sum(),
                                "top3_area": np.linalg.norm(np.cross((colors[1] - colors[0])[0], (colors[2] - colors[0])[0])),
                                "top10_fraction": fractions.sum(),
                                "top10_score": scores.sum()}

    if 'cropHintsAnnotation' in jfile.keys():
        tmp = jfile["cropHintsAnnotation"]["cropHints"][0]
        response["crop"] = {"PetID": pet_id,
                            "FileID": file_id,
                            "confidence": tmp["confidence"]}
        if not tmp.get("importanceFraction") is None:
            response["crop"]["importanceFraction"] = tmp["importanceFraction"]
    
    if 'textAnnotations' in jfile.keys():
        for anot in jfile["textAnnotations"]:
            response["text"]["description"] += anot["description"] + " "
    
    if "faceAnnotations" in jfile.keys():
        faceanot = jfile["faceAnnotations"][0]
        response["face"] = {"PetID": pet_id,
                            "FileID": file_id,
                            "detectionConfidence": faceanot['detectionConfidence'],
                            'landmarkingConfidence': faceanot['landmarkingConfidence'],
                            }
    
    return response

In [14]:
metadata_path = [dir_ + file for dir_ in ["/data/quy/petfinder-adoption-prediction/train_meta/",
                                          "/data/quy/petfinder-adoption-prediction/test_meta/"]
                                 for file in os.listdir(dir_)]

print(len(metadata_path))

72776


In [15]:
results = Parallel(n_jobs=-1, verbose=50)([delayed(load_metadata)(path) for path in metadata_path])

labels = []
properties = []
crops = []
faces = []
texts = []
for res in tqdm(results):
    if not res.get("labels") is None:
        labels.extend(res["labels"])
    if not res.get("property") is None:
        properties.append(res["property"])
    if not res.get("crop") is None:
        crops.append(res["crop"])
    if not res.get("face") is None:
        faces.append(res["face"])
    if not res.get("text") is None:
        texts.append(res["text"])

labels_df = pd.DataFrame(labels)
properties_df = pd.DataFrame(properties)
crops_df = pd.DataFrame(crops)
faces_df = pd.DataFrame(faces)
texts_df = pd.DataFrame(texts)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done  11 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done  15 tasks      | elapsed:  

[Parallel(n_jobs=-1)]: Done 1603 tasks      | elapsed:    1.1s
[Parallel(n_jobs=-1)]: Done 1717 tasks      | elapsed:    1.1s
[Parallel(n_jobs=-1)]: Done 1831 tasks      | elapsed:    1.1s
[Parallel(n_jobs=-1)]: Done 1945 tasks      | elapsed:    1.1s
[Parallel(n_jobs=-1)]: Done 2059 tasks      | elapsed:    1.1s
[Parallel(n_jobs=-1)]: Done 2173 tasks      | elapsed:    1.1s
[Parallel(n_jobs=-1)]: Done 2287 tasks      | elapsed:    1.1s
[Parallel(n_jobs=-1)]: Done 2401 tasks      | elapsed:    1.1s
[Parallel(n_jobs=-1)]: Done 2515 tasks      | elapsed:    1.2s
[Parallel(n_jobs=-1)]: Done 2629 tasks      | elapsed:    1.2s
[Parallel(n_jobs=-1)]: Done 2743 tasks      | elapsed:    1.2s
[Parallel(n_jobs=-1)]: Done 2857 tasks      | elapsed:    1.2s
[Parallel(n_jobs=-1)]: Done 2971 tasks      | elapsed:    1.2s
[Parallel(n_jobs=-1)]: Done 3085 tasks      | elapsed:    1.2s
[Parallel(n_jobs=-1)]: Done 3199 tasks      | elapsed:    1.2s
[Parallel(n_jobs=-1)]: Done 3313 tasks      | elapsed: 

[Parallel(n_jobs=-1)]: Done 28783 tasks      | elapsed:   11.2s
[Parallel(n_jobs=-1)]: Done 29023 tasks      | elapsed:   11.6s
[Parallel(n_jobs=-1)]: Done 29263 tasks      | elapsed:   11.9s
[Parallel(n_jobs=-1)]: Done 29503 tasks      | elapsed:   16.2s
[Parallel(n_jobs=-1)]: Done 29743 tasks      | elapsed:   16.7s
[Parallel(n_jobs=-1)]: Done 29983 tasks      | elapsed:   16.8s
[Parallel(n_jobs=-1)]: Done 30223 tasks      | elapsed:   16.9s
[Parallel(n_jobs=-1)]: Done 30463 tasks      | elapsed:   17.1s
[Parallel(n_jobs=-1)]: Done 30703 tasks      | elapsed:   17.2s
[Parallel(n_jobs=-1)]: Done 30943 tasks      | elapsed:   17.2s
[Parallel(n_jobs=-1)]: Done 31183 tasks      | elapsed:   17.4s
[Parallel(n_jobs=-1)]: Done 31423 tasks      | elapsed:   17.6s
[Parallel(n_jobs=-1)]: Done 31663 tasks      | elapsed:   17.8s
[Parallel(n_jobs=-1)]: Done 31903 tasks      | elapsed:   18.1s
[Parallel(n_jobs=-1)]: Done 32143 tasks      | elapsed:   18.6s
[Parallel(n_jobs=-1)]: Done 32383 tasks 

[Parallel(n_jobs=-1)]: Done 39523 tasks      | elapsed:   27.7s
[Parallel(n_jobs=-1)]: Done 39553 tasks      | elapsed:   27.7s
[Parallel(n_jobs=-1)]: Done 39568 tasks      | elapsed:   27.7s
[Parallel(n_jobs=-1)]: Done 39583 tasks      | elapsed:   27.8s
[Parallel(n_jobs=-1)]: Done 39598 tasks      | elapsed:   27.8s
[Parallel(n_jobs=-1)]: Done 39613 tasks      | elapsed:   27.8s
[Parallel(n_jobs=-1)]: Done 39628 tasks      | elapsed:   27.8s
[Parallel(n_jobs=-1)]: Done 39643 tasks      | elapsed:   27.8s
[Parallel(n_jobs=-1)]: Done 39658 tasks      | elapsed:   27.9s
[Parallel(n_jobs=-1)]: Done 39673 tasks      | elapsed:   27.9s
[Parallel(n_jobs=-1)]: Done 39688 tasks      | elapsed:   27.9s
[Parallel(n_jobs=-1)]: Done 39703 tasks      | elapsed:   27.9s
[Parallel(n_jobs=-1)]: Done 39718 tasks      | elapsed:   28.0s
[Parallel(n_jobs=-1)]: Done 39733 tasks      | elapsed:   28.0s
[Parallel(n_jobs=-1)]: Done 39748 tasks      | elapsed:   28.0s
[Parallel(n_jobs=-1)]: Done 39763 tasks 

[Parallel(n_jobs=-1)]: Done 41638 tasks      | elapsed:   31.1s
[Parallel(n_jobs=-1)]: Done 41653 tasks      | elapsed:   31.1s
[Parallel(n_jobs=-1)]: Done 41668 tasks      | elapsed:   31.1s
[Parallel(n_jobs=-1)]: Done 41683 tasks      | elapsed:   31.1s
[Parallel(n_jobs=-1)]: Done 41698 tasks      | elapsed:   31.2s
[Parallel(n_jobs=-1)]: Done 41713 tasks      | elapsed:   31.2s
[Parallel(n_jobs=-1)]: Done 41728 tasks      | elapsed:   31.2s
[Parallel(n_jobs=-1)]: Done 41743 tasks      | elapsed:   31.2s
[Parallel(n_jobs=-1)]: Done 41758 tasks      | elapsed:   31.3s
[Parallel(n_jobs=-1)]: Done 41773 tasks      | elapsed:   31.3s
[Parallel(n_jobs=-1)]: Done 41788 tasks      | elapsed:   31.3s
[Parallel(n_jobs=-1)]: Done 41803 tasks      | elapsed:   31.4s
[Parallel(n_jobs=-1)]: Done 41818 tasks      | elapsed:   31.4s
[Parallel(n_jobs=-1)]: Done 41833 tasks      | elapsed:   31.4s
[Parallel(n_jobs=-1)]: Done 41848 tasks      | elapsed:   31.4s
[Parallel(n_jobs=-1)]: Done 41863 tasks 

[Parallel(n_jobs=-1)]: Done 43693 tasks      | elapsed:   34.4s
[Parallel(n_jobs=-1)]: Done 43708 tasks      | elapsed:   34.4s
[Parallel(n_jobs=-1)]: Done 43723 tasks      | elapsed:   34.5s
[Parallel(n_jobs=-1)]: Done 43738 tasks      | elapsed:   34.5s
[Parallel(n_jobs=-1)]: Done 43753 tasks      | elapsed:   34.5s
[Parallel(n_jobs=-1)]: Done 43768 tasks      | elapsed:   34.5s
[Parallel(n_jobs=-1)]: Done 43783 tasks      | elapsed:   34.6s
[Parallel(n_jobs=-1)]: Done 43798 tasks      | elapsed:   34.6s
[Parallel(n_jobs=-1)]: Done 43813 tasks      | elapsed:   34.6s
[Parallel(n_jobs=-1)]: Done 43828 tasks      | elapsed:   34.7s
[Parallel(n_jobs=-1)]: Done 43843 tasks      | elapsed:   34.7s
[Parallel(n_jobs=-1)]: Done 43858 tasks      | elapsed:   34.7s
[Parallel(n_jobs=-1)]: Done 43873 tasks      | elapsed:   34.7s
[Parallel(n_jobs=-1)]: Done 43888 tasks      | elapsed:   34.8s
[Parallel(n_jobs=-1)]: Done 43903 tasks      | elapsed:   34.8s
[Parallel(n_jobs=-1)]: Done 43918 tasks 

[Parallel(n_jobs=-1)]: Done 45628 tasks      | elapsed:   37.3s
[Parallel(n_jobs=-1)]: Done 45643 tasks      | elapsed:   37.3s
[Parallel(n_jobs=-1)]: Done 45658 tasks      | elapsed:   37.4s
[Parallel(n_jobs=-1)]: Done 45673 tasks      | elapsed:   37.4s
[Parallel(n_jobs=-1)]: Done 45688 tasks      | elapsed:   37.4s
[Parallel(n_jobs=-1)]: Done 45703 tasks      | elapsed:   37.4s
[Parallel(n_jobs=-1)]: Done 45718 tasks      | elapsed:   37.4s
[Parallel(n_jobs=-1)]: Done 45733 tasks      | elapsed:   37.4s
[Parallel(n_jobs=-1)]: Done 45748 tasks      | elapsed:   37.5s
[Parallel(n_jobs=-1)]: Done 45763 tasks      | elapsed:   37.5s
[Parallel(n_jobs=-1)]: Done 45778 tasks      | elapsed:   37.5s
[Parallel(n_jobs=-1)]: Done 45793 tasks      | elapsed:   37.5s
[Parallel(n_jobs=-1)]: Done 45808 tasks      | elapsed:   37.5s
[Parallel(n_jobs=-1)]: Done 45823 tasks      | elapsed:   37.6s
[Parallel(n_jobs=-1)]: Done 45838 tasks      | elapsed:   37.6s
[Parallel(n_jobs=-1)]: Done 45853 tasks 

[Parallel(n_jobs=-1)]: Done 47608 tasks      | elapsed:   40.4s
[Parallel(n_jobs=-1)]: Done 47623 tasks      | elapsed:   40.4s
[Parallel(n_jobs=-1)]: Done 47638 tasks      | elapsed:   40.5s
[Parallel(n_jobs=-1)]: Done 47653 tasks      | elapsed:   40.5s
[Parallel(n_jobs=-1)]: Done 47668 tasks      | elapsed:   40.5s
[Parallel(n_jobs=-1)]: Done 47683 tasks      | elapsed:   40.5s
[Parallel(n_jobs=-1)]: Done 47698 tasks      | elapsed:   40.6s
[Parallel(n_jobs=-1)]: Done 47713 tasks      | elapsed:   40.6s
[Parallel(n_jobs=-1)]: Done 47728 tasks      | elapsed:   40.6s
[Parallel(n_jobs=-1)]: Done 47743 tasks      | elapsed:   40.7s
[Parallel(n_jobs=-1)]: Done 47758 tasks      | elapsed:   40.7s
[Parallel(n_jobs=-1)]: Done 47773 tasks      | elapsed:   40.7s
[Parallel(n_jobs=-1)]: Done 47788 tasks      | elapsed:   40.7s
[Parallel(n_jobs=-1)]: Done 47803 tasks      | elapsed:   40.8s
[Parallel(n_jobs=-1)]: Done 47818 tasks      | elapsed:   40.8s
[Parallel(n_jobs=-1)]: Done 47833 tasks 

[Parallel(n_jobs=-1)]: Done 49678 tasks      | elapsed:   43.8s
[Parallel(n_jobs=-1)]: Done 49693 tasks      | elapsed:   43.8s
[Parallel(n_jobs=-1)]: Done 49708 tasks      | elapsed:   43.9s
[Parallel(n_jobs=-1)]: Done 49723 tasks      | elapsed:   43.9s
[Parallel(n_jobs=-1)]: Done 49738 tasks      | elapsed:   43.9s
[Parallel(n_jobs=-1)]: Done 49753 tasks      | elapsed:   44.0s
[Parallel(n_jobs=-1)]: Done 49768 tasks      | elapsed:   44.0s
[Parallel(n_jobs=-1)]: Done 49783 tasks      | elapsed:   44.0s
[Parallel(n_jobs=-1)]: Done 49798 tasks      | elapsed:   44.0s
[Parallel(n_jobs=-1)]: Done 49813 tasks      | elapsed:   44.0s
[Parallel(n_jobs=-1)]: Done 49828 tasks      | elapsed:   44.0s
[Parallel(n_jobs=-1)]: Done 49843 tasks      | elapsed:   44.1s
[Parallel(n_jobs=-1)]: Done 49858 tasks      | elapsed:   44.1s
[Parallel(n_jobs=-1)]: Done 49873 tasks      | elapsed:   44.1s
[Parallel(n_jobs=-1)]: Done 49888 tasks      | elapsed:   44.1s
[Parallel(n_jobs=-1)]: Done 49903 tasks 

[Parallel(n_jobs=-1)]: Done 51613 tasks      | elapsed:   47.4s
[Parallel(n_jobs=-1)]: Done 51628 tasks      | elapsed:   47.5s
[Parallel(n_jobs=-1)]: Done 51643 tasks      | elapsed:   47.6s
[Parallel(n_jobs=-1)]: Done 51658 tasks      | elapsed:   47.6s
[Parallel(n_jobs=-1)]: Done 51673 tasks      | elapsed:   47.6s
[Parallel(n_jobs=-1)]: Done 51688 tasks      | elapsed:   47.6s
[Parallel(n_jobs=-1)]: Done 51703 tasks      | elapsed:   47.7s
[Parallel(n_jobs=-1)]: Done 51718 tasks      | elapsed:   47.7s
[Parallel(n_jobs=-1)]: Done 51733 tasks      | elapsed:   47.8s
[Parallel(n_jobs=-1)]: Done 51748 tasks      | elapsed:   47.8s
[Parallel(n_jobs=-1)]: Done 51763 tasks      | elapsed:   47.9s
[Parallel(n_jobs=-1)]: Done 51778 tasks      | elapsed:   47.9s
[Parallel(n_jobs=-1)]: Done 51793 tasks      | elapsed:   47.9s
[Parallel(n_jobs=-1)]: Done 51808 tasks      | elapsed:   48.1s
[Parallel(n_jobs=-1)]: Done 51823 tasks      | elapsed:   48.1s
[Parallel(n_jobs=-1)]: Done 51838 tasks 

[Parallel(n_jobs=-1)]: Done 53346 tasks      | elapsed:   52.4s
[Parallel(n_jobs=-1)]: Done 53353 tasks      | elapsed:   52.4s
[Parallel(n_jobs=-1)]: Done 53360 tasks      | elapsed:   52.4s
[Parallel(n_jobs=-1)]: Done 53367 tasks      | elapsed:   52.4s
[Parallel(n_jobs=-1)]: Done 53374 tasks      | elapsed:   52.4s
[Parallel(n_jobs=-1)]: Done 53381 tasks      | elapsed:   52.4s
[Parallel(n_jobs=-1)]: Done 53388 tasks      | elapsed:   52.4s
[Parallel(n_jobs=-1)]: Done 53395 tasks      | elapsed:   52.4s
[Parallel(n_jobs=-1)]: Done 53402 tasks      | elapsed:   52.4s
[Parallel(n_jobs=-1)]: Done 53409 tasks      | elapsed:   52.5s
[Parallel(n_jobs=-1)]: Done 53416 tasks      | elapsed:   52.5s
[Parallel(n_jobs=-1)]: Done 53423 tasks      | elapsed:   52.5s
[Parallel(n_jobs=-1)]: Done 53430 tasks      | elapsed:   52.5s
[Parallel(n_jobs=-1)]: Done 53437 tasks      | elapsed:   52.5s
[Parallel(n_jobs=-1)]: Done 53444 tasks      | elapsed:   52.5s
[Parallel(n_jobs=-1)]: Done 53451 tasks 

[Parallel(n_jobs=-1)]: Done 54354 tasks      | elapsed:   54.1s
[Parallel(n_jobs=-1)]: Done 54361 tasks      | elapsed:   54.1s
[Parallel(n_jobs=-1)]: Done 54368 tasks      | elapsed:   54.1s
[Parallel(n_jobs=-1)]: Done 54375 tasks      | elapsed:   54.1s
[Parallel(n_jobs=-1)]: Done 54382 tasks      | elapsed:   54.1s
[Parallel(n_jobs=-1)]: Done 54389 tasks      | elapsed:   54.1s
[Parallel(n_jobs=-1)]: Done 54396 tasks      | elapsed:   54.2s
[Parallel(n_jobs=-1)]: Done 54403 tasks      | elapsed:   54.2s
[Parallel(n_jobs=-1)]: Done 54410 tasks      | elapsed:   54.2s
[Parallel(n_jobs=-1)]: Done 54417 tasks      | elapsed:   54.2s
[Parallel(n_jobs=-1)]: Done 54424 tasks      | elapsed:   54.2s
[Parallel(n_jobs=-1)]: Done 54431 tasks      | elapsed:   54.2s
[Parallel(n_jobs=-1)]: Done 54438 tasks      | elapsed:   54.2s
[Parallel(n_jobs=-1)]: Done 54445 tasks      | elapsed:   54.2s
[Parallel(n_jobs=-1)]: Done 54452 tasks      | elapsed:   54.2s
[Parallel(n_jobs=-1)]: Done 54459 tasks 

[Parallel(n_jobs=-1)]: Done 55334 tasks      | elapsed:   55.5s
[Parallel(n_jobs=-1)]: Done 55341 tasks      | elapsed:   55.5s
[Parallel(n_jobs=-1)]: Done 55348 tasks      | elapsed:   55.6s
[Parallel(n_jobs=-1)]: Done 55355 tasks      | elapsed:   55.6s
[Parallel(n_jobs=-1)]: Done 55362 tasks      | elapsed:   55.6s
[Parallel(n_jobs=-1)]: Done 55369 tasks      | elapsed:   55.6s
[Parallel(n_jobs=-1)]: Done 55376 tasks      | elapsed:   55.6s
[Parallel(n_jobs=-1)]: Done 55383 tasks      | elapsed:   55.6s
[Parallel(n_jobs=-1)]: Done 55390 tasks      | elapsed:   55.6s
[Parallel(n_jobs=-1)]: Done 55397 tasks      | elapsed:   55.6s
[Parallel(n_jobs=-1)]: Done 55404 tasks      | elapsed:   55.6s
[Parallel(n_jobs=-1)]: Done 55411 tasks      | elapsed:   55.6s
[Parallel(n_jobs=-1)]: Done 55418 tasks      | elapsed:   55.6s
[Parallel(n_jobs=-1)]: Done 55425 tasks      | elapsed:   55.7s
[Parallel(n_jobs=-1)]: Done 55432 tasks      | elapsed:   55.7s
[Parallel(n_jobs=-1)]: Done 55439 tasks 

[Parallel(n_jobs=-1)]: Done 56300 tasks      | elapsed:   57.0s
[Parallel(n_jobs=-1)]: Done 56307 tasks      | elapsed:   57.0s
[Parallel(n_jobs=-1)]: Done 56314 tasks      | elapsed:   57.0s
[Parallel(n_jobs=-1)]: Done 56321 tasks      | elapsed:   57.0s
[Parallel(n_jobs=-1)]: Done 56328 tasks      | elapsed:   57.0s
[Parallel(n_jobs=-1)]: Done 56335 tasks      | elapsed:   57.1s
[Parallel(n_jobs=-1)]: Done 56342 tasks      | elapsed:   57.1s
[Parallel(n_jobs=-1)]: Done 56349 tasks      | elapsed:   57.1s
[Parallel(n_jobs=-1)]: Done 56356 tasks      | elapsed:   57.1s
[Parallel(n_jobs=-1)]: Done 56363 tasks      | elapsed:   57.1s
[Parallel(n_jobs=-1)]: Done 56370 tasks      | elapsed:   57.1s
[Parallel(n_jobs=-1)]: Done 56377 tasks      | elapsed:   57.1s
[Parallel(n_jobs=-1)]: Done 56384 tasks      | elapsed:   57.1s
[Parallel(n_jobs=-1)]: Done 56391 tasks      | elapsed:   57.2s
[Parallel(n_jobs=-1)]: Done 56398 tasks      | elapsed:   57.2s
[Parallel(n_jobs=-1)]: Done 56405 tasks 

[Parallel(n_jobs=-1)]: Done 57336 tasks      | elapsed:   58.7s
[Parallel(n_jobs=-1)]: Done 57343 tasks      | elapsed:   58.7s
[Parallel(n_jobs=-1)]: Done 57350 tasks      | elapsed:   58.7s
[Parallel(n_jobs=-1)]: Done 57357 tasks      | elapsed:   58.8s
[Parallel(n_jobs=-1)]: Done 57364 tasks      | elapsed:   58.8s
[Parallel(n_jobs=-1)]: Done 57371 tasks      | elapsed:   58.8s
[Parallel(n_jobs=-1)]: Done 57378 tasks      | elapsed:   58.8s
[Parallel(n_jobs=-1)]: Done 57385 tasks      | elapsed:   58.8s
[Parallel(n_jobs=-1)]: Done 57392 tasks      | elapsed:   58.8s
[Parallel(n_jobs=-1)]: Done 57399 tasks      | elapsed:   58.8s
[Parallel(n_jobs=-1)]: Done 57406 tasks      | elapsed:   58.8s
[Parallel(n_jobs=-1)]: Done 57413 tasks      | elapsed:   58.8s
[Parallel(n_jobs=-1)]: Done 57420 tasks      | elapsed:   58.8s
[Parallel(n_jobs=-1)]: Done 57427 tasks      | elapsed:   58.9s
[Parallel(n_jobs=-1)]: Done 57434 tasks      | elapsed:   58.9s
[Parallel(n_jobs=-1)]: Done 57441 tasks 

[Parallel(n_jobs=-1)]: Done 58344 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 58351 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 58358 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 58365 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 58372 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 58379 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 58386 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 58393 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 58400 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 58407 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 58414 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 58421 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 58428 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 58435 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 58442 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 58449 tasks 

[Parallel(n_jobs=-1)]: Done 59401 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 59408 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 59415 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 59422 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 59429 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 59436 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 59443 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 59450 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 59457 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 59464 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 59471 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 59478 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 59485 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 59492 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 59499 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 59506 tasks 

[Parallel(n_jobs=-1)]: Done 60318 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 60325 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 60332 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 60339 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 60346 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 60353 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 60360 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 60367 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 60374 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 60381 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 60388 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 60395 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 60402 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 60409 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 60416 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 60423 tasks 

[Parallel(n_jobs=-1)]: Done 61326 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 61333 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 61340 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 61347 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 61354 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 61361 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 61368 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 61375 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 61382 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 61389 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 61396 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 61403 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 61410 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 61417 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 61424 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 61431 tasks 

[Parallel(n_jobs=-1)]: Done 62404 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 62411 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 62418 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 62425 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 62432 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 62439 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 62446 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 62453 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 62460 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 62467 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 62474 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 62481 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 62488 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 62495 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 62502 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 62509 tasks 

[Parallel(n_jobs=-1)]: Done 63454 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 63461 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 63468 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 63475 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 63482 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 63489 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 63496 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 63503 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 63510 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 63517 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 63524 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 63531 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 63538 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 63545 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 63552 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 63559 tasks 

[Parallel(n_jobs=-1)]: Done 64448 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 64455 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 64462 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 64469 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 64476 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 64483 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 64490 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 64497 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 64504 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 64511 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 64518 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 64525 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 64532 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 64539 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 64546 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 64553 tasks 

[Parallel(n_jobs=-1)]: Done 65351 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 65358 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 65365 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 65372 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 65379 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 65386 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 65393 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 65400 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 65407 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 65414 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 65421 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 65428 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 65435 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 65442 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 65449 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 65456 tasks 

[Parallel(n_jobs=-1)]: Done 66394 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 66401 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 66408 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 66415 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 66422 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 66429 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 66436 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 66443 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 66450 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 66457 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 66464 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 66471 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 66478 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 66485 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 66492 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 66499 tasks 

[Parallel(n_jobs=-1)]: Done 67416 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 67423 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 67430 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 67437 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 67444 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 67451 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 67458 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 67465 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 67472 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 67479 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 67486 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 67493 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 67500 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 67507 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 67514 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 67521 tasks 

[Parallel(n_jobs=-1)]: Done 68410 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 68417 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 68424 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 68431 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 68438 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 68445 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 68452 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 68459 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 68466 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 68473 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 68480 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 68487 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 68494 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 68501 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 68508 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 68515 tasks 

[Parallel(n_jobs=-1)]: Done 69348 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 69355 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 69362 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 69369 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 69376 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 69383 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 69390 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 69397 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 69404 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 69411 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 69418 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 69425 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 69432 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 69439 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 69446 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 69453 tasks 

[Parallel(n_jobs=-1)]: Done 70790 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 70804 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 70818 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 70832 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 70846 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 70860 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 70874 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 70888 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 70902 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 70916 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 70930 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 70944 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 70958 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 70972 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 70986 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 71000 tasks 

100%|██████████| 72776/72776 [00:00<00:00, 440487.56it/s]

[Parallel(n_jobs=-1)]: Done 72776 out of 72776 | elapsed:  1.4min finished





In [16]:
labels_agg = labels_df.groupby(["PetID", "description"])["score"].max().reset_index()
df2ffm.add_field_dataframe(labels_agg.rename(columns={"description": "labels_description"}).reset_index(), "labels_description", "score")

274706it [00:26, 10359.27it/s]


In [17]:
ffm_df = pd.Series(df2ffm.result).reset_index()
ffm_df.columns = ["PetID", "ffm_text"]

train_ffm = train_df[["PetID", "AdoptionSpeed", "group"]].merge(ffm_df,
                                                                on="PetID",
                                                                how="left")

test_ffm = test_df[["PetID"]].merge(ffm_df,
                                    on="PetID",
                                    how="left")

In [25]:
train_ffm.head()

Unnamed: 0,PetID,AdoptionSpeed,group,ffm_text
0,86e1089a3,2,7,2 0:0:2 1:1:-0.39081823392891024 202:201:1 203...
1,6296e909a,0,3,0 0:0:2 1:1:-1.0702815346644163 202:202:1 203:...
2,3422e4906,3,0,3 0:0:1 1:1:-1.0702815346644163 202:203:1 203:...
3,5842f1ff5,2,6,2 0:0:1 1:1:-0.1720799079772499 202:203:1 203:...
4,850a43f90,2,5,2 0:0:1 1:1:-1.0702815346644163 202:203:1 203:...


In [18]:
with open("./working/test.txt", "w") as f:
    f.write("\n".join(test_ffm.loc[:, "ffm_text"].values.tolist()))


In [19]:
train_oof = np.zeros(len(train_df))
test_pred = np.zeros(len(test_df))

for j in tqdm(range(50)):
    i = j%10
    with open("./working/dev.txt", "w") as f:
        f.write("\n".join(train_ffm.loc[train_ffm["group"] != i, "ffm_text"].values.tolist()))
    with open("./working/val.txt", "w") as f:
        f.write("\n".join(train_ffm.loc[train_ffm["group"] == i, "ffm_text"].values.tolist()))
    param = {"task": "reg",
             "lr": .1,
             "epoch": 200,
             "lambda": .0001,
             "k": 4,
             "nthread": 4,
             "metric": "rmse"}
    ffm_model =xl.create_ffm()
    ffm_model.setTrain("./working/dev.txt")
    ffm_model.setValidate("./working/val.txt")
    ffm_model.fit(param, "./working/model.out")

    ffm_model.setTest("./working/val.txt")
    ffm_model.predict("./working/model.out", "./working/output.txt")
    output = pd.read_csv("./working/output.txt", header=None)[0].values
    train_oof[np.where(train_ffm["group"] == i)] += output / 5

    ffm_model.setTest("./working/test.txt")
    ffm_model.predict("./working/model.out", "./working/output.txt")
    output = pd.read_csv("./working/output.txt", header=None)[0].values
    test_pred += output / 50

100%|██████████| 50/50 [02:27<00:00,  2.05s/it]


In [20]:
from sklearn.metrics import mean_squared_error
np.sqrt(mean_squared_error(train_df["AdoptionSpeed"], train_oof))

1.0623204592256668

In [21]:
from sklearn.metrics import cohen_kappa_score

# https://www.kaggle.com/naveenasaithambi/optimizedrounder-improved
class OptimizedRounder(object):
    def __init__(self):
        self.coef_ = 0
    
    def _kappa_loss(self, coef, X, y):
        preds = pd.cut(X, [-np.inf] + list(np.sort(coef)) + [np.inf], labels = [0, 1, 2, 3, 4])
        return -cohen_kappa_score(y, preds, weights = 'quadratic')
    
    def fit(self, X, y):
        loss_partial = partial(self._kappa_loss, X = X, y = y)
        initial_coef = np.percentile(X, [2.73, 23.3, 50.3, 72]) # <= improved
        self.coef_ = sp.optimize.minimize(loss_partial, initial_coef, method = 'nelder-mead')
    
    def predict(self, X, coef):
        preds = pd.cut(X, [-np.inf] + list(np.sort(coef)) + [np.inf], labels = [0, 1, 2, 3, 4])
        return preds
    
    def coefficients(self):
        return self.coef_['x']


# https://www.kaggle.com/c/petfinder-adoption-prediction/discussion/76106
def confusion_matrix(rater_a, rater_b, min_rating=None, max_rating=None):
    """
    Returns the confusion matrix between rater's ratings
    """
    assert(len(rater_a) == len(rater_b))
    if min_rating is None:
        min_rating = min(rater_a + rater_b)
    if max_rating is None:
        max_rating = max(rater_a + rater_b)
    num_ratings = int(max_rating - min_rating + 1)
    conf_mat = [[0 for i in range(num_ratings)]
                for j in range(num_ratings)]
    for a, b in zip(rater_a, rater_b):
        conf_mat[a - min_rating][b - min_rating] += 1
    return conf_mat


def histogram(ratings, min_rating=None, max_rating=None):
    """
    Returns the counts of each type of rating that a rater made
    """
    if min_rating is None:
        min_rating = min(ratings)
    if max_rating is None:
        max_rating = max(ratings)
    num_ratings = int(max_rating - min_rating + 1)
    hist_ratings = [0 for x in range(num_ratings)]
    for r in ratings:
        hist_ratings[r - min_rating] += 1
    return hist_ratings


def quadratic_weighted_kappa(y, y_pred):
    """
    Calculates the quadratic weighted kappa
    axquadratic_weighted_kappa calculates the quadratic weighted kappa
    value, which is a measure of inter-rater agreement between two raters
    that provide discrete numeric ratings.  Potential values range from -1
    (representing complete disagreement) to 1 (representing complete
    agreement).  A kappa value of 0 is expected if all agreement is due to
    chance.
    quadratic_weighted_kappa(rater_a, rater_b), where rater_a and rater_b
    each correspond to a list of integer ratings.  These lists must have the
    same length.
    The ratings should be integers, and it is assumed that they contain
    the complete range of possible ratings.
    quadratic_weighted_kappa(X, min_rating, max_rating), where min_rating
    is the minimum possible rating, and max_rating is the maximum possible
    rating
    """
    rater_a = y
    rater_b = y_pred
    min_rating=None
    max_rating=None
    rater_a = np.array(rater_a, dtype=int)
    rater_b = np.array(rater_b, dtype=int)
    assert(len(rater_a) == len(rater_b))
    if min_rating is None:
        min_rating = min(min(rater_a), min(rater_b))
    if max_rating is None:
        max_rating = max(max(rater_a), max(rater_b))
    conf_mat = confusion_matrix(rater_a, rater_b,
                                min_rating, max_rating)
    num_ratings = len(conf_mat)
    num_scored_items = float(len(rater_a))

    hist_rater_a = histogram(rater_a, min_rating, max_rating)
    hist_rater_b = histogram(rater_b, min_rating, max_rating)

    numerator = 0.0
    denominator = 0.0

    for i in range(num_ratings):
        for j in range(num_ratings):
            expected_count = (hist_rater_a[i] * hist_rater_b[j]
                              / num_scored_items)
            d = pow(i - j, 2.0) / pow(num_ratings - 1, 2.0)
            numerator += d * conf_mat[i][j] / num_scored_items
            denominator += d * expected_count / num_scored_items

    return (1.0 - numerator / denominator)

In [22]:
y_train = train_df["AdoptionSpeed"].values
group = train_df["group"].values

coef_mean = np.zeros(4)
for fold in range(10):
    dev_idx = np.where(group != fold)[0]
    val_idx = np.where(group == fold)[0]

    pred_dev = train_oof[dev_idx]
    y_dev = y_train[dev_idx]

    pred_val = train_oof[val_idx]
    y_val = y_train[val_idx]

    optR = OptimizedRounder()
    optR.fit(pred_dev, y_dev)
    coefficients = optR.coefficients()
    coef_mean += coefficients / 10
    pred_val_k = optR.predict(pred_val, coefficients)

In [23]:
quadratic_weighted_kappa(y_train, optR.predict(train_oof, coef_mean))

0.41893563186692717

In [24]:
smpsb_df["AdoptionSpeed"] = optR.predict(test_pred, coef_mean)
smpsb_df.to_csv("submission.csv", index=None)