In [None]:
# !pip install pyarrow
# !pip install fastparquet

In [None]:
import numpy as np, pandas as pd, datetime as dt
import matplotlib.pyplot as plt; plt.style.use('ggplot')
import seaborn as sns
from collections import defaultdict

def iter_to_str(iterable):
    return " ".join(map(lambda x: str(0) + str(x), iterable))

def apk(actual, predicted, k=12):
    if len(predicted) > k:
        predicted = predicted[:k]
    score, nhits = 0.0, 0.0
    for i, p in enumerate(predicted):
        if p in actual and p not in predicted[:i]:
            nhits += 1.0
            score += nhits / (i + 1.0)
    if not actual:
        return 0.0
    return score / min(len(actual), k)

def mapk(actual, predicted, k=12, return_apks=False):
    assert len(actual) == len(predicted)
    apks = [apk(ac, pr, k) for ac, pr in zip(actual, predicted) if 0 < len(ac)]
    if return_apks:
        return apks
    return np.mean(apks)

def blend(dt, w=[], k=12):
    if len(w) == 0:
        w = [1] * (len(dt))
    preds = []
    for i in range(len(w)):
        preds.append(dt[i].split())
    res = {}
    for i in range(len(preds)):
        if w[i] < 0:
            continue
        for n, v in enumerate(preds[i]):
            if v in res:
                res[v] += (w[i] / (n + 1))
            else:
                res[v] = (w[i] / (n + 1))    
    res = list(dict(sorted(res.items(), key=lambda item: -item[1])).keys())
    return ' '.join(res[:k])

def prune(pred, ok_set, k=12):
    pred = pred.split()
    post = []
    for item in pred:
        if int(item) in ok_set and not item in post:
            post.append(item)
    return " ".join(post[:k])

def validation(actual, predicted, grouping, score=0, index=-1, ignore=False, figsize=(12, 6)):
    # actual, predicted : list of lists
    # group : pandas Series
    # score : pandas DataFrame
    
    vc = pd.Series(predicted).apply(len).value_counts()
    print("Fill Rate = ", round(1 - sum(vc[k] * (12 - k) / 12 for k in (set(range(12)) & set(vc.index))) / len(actual), 3) * 100)
    
    
    if ignore: return
    ap12 = mapk(actual, predicted, return_apks=True)
    map12 = round(np.mean(ap12), 6)
    if isinstance(score, int): score = pd.DataFrame({g:[] for g in sorted(grouping.unique().tolist())})
    if index == -1 : index = score.shape[0]
    score.loc[index, "All"] = map12
    plt.figure(figsize=figsize)
    plt.subplot(1, 2, 1); sns.histplot(data=ap12, log_scale=(0, 10), bins=20); plt.title(f"MAP@12 : {map12}")
    for g in grouping.unique():
        map12 = round(mapk(actual[grouping == g], predicted[grouping == g]), 6)
        score.loc[index, g] = map12
    plt.subplot(1, 2, 2); score[[g for g in grouping.unique()[::-1]] + ['All']].loc[index].plot.barh(); plt.title(f"MAP@12 of Groups")
    vc = pd.Series(predicted).apply(len).value_counts()
    score.loc[index, "Fill"] = round(1 - sum(vc[k] * (12 - k) / 12 for k in (set(range(12)) & set(vc.index))) / len(actual), 3) * 100
    display(score)
    return score

In [None]:
sub = pd.read_csv('../input/h-and-m-personalized-fashion-recommendations/sample_submission.csv')

In [None]:
sub0 = pd.read_csv("../input/hm-for-ensemble/submission_uucf0252.csv")
sub['sub0'] = sub0['prediction'].fillna("")
del sub0

In [None]:
sub1 = pd.read_csv("../input/hm-for-ensemble/submission-blend-255.csv")
sub['sub1'] = sub1['prediction'].fillna("")
del sub1

In [None]:
sub2 = pd.read_csv("../input/hm-for-ensemble/submission-magic-multi-brend-0240.csv")
sub['sub2'] = sub2['prediction'].fillna("")
del sub2

In [None]:
sub3 = pd.read_csv("../input/handmsubmitfiles/LGBM_Ranker_submission_229.csv")
sub['sub3'] = sub3['prediction'].fillna("")
del sub3

In [None]:
#lstm
sub4 = pd.read_csv("../input/lstm-model-with-item-infor-fix-missing-last-item/submission.csv")
sub['sub4'] = sub4['prediction'].fillna("")
del sub4

In [None]:
#collabo
sub5 = pd.read_csv("../input/handmsubmitfiles/colabo_uucf_only_ver6.csv")
sub['sub5'] = sub5['prediction'].fillna("")
del sub5

In [None]:
#agegen
sub6 = pd.read_csv("../input/h-m-easy-grouping-by-sex-attribute-age-en-jp/submission.csv")
sub['sub6'] = sub6['prediction'].fillna("")
del sub6

In [None]:
sub.head(3)

In [None]:
targets = ['sub0', 'sub1', 'sub2', 'sub3','sub4','sub5', 'sub6']
#targets = ['sub0', 'sub1', 'sub2', 'sub3']
weights = [1.6, 1, 0.20, 0.20, 0.10,0,0.10]# 0.0252 0.0255 0.0240 0.0229


# 0.0255↓
# targets = ['sub0', 'sub1', 'sub2', 'sub3', 'sub4' ]
# weights = [1,1.1,0.8,0.7,0.7]
sub['prediction'] = sub[targets].apply(blend, w=weights, axis=1, k=12)

In [None]:
sub.head(3)

In [None]:
sub[['customer_id', 'prediction']].to_csv('submission_ensamble.csv', index=False)