In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
def get_effort(incentive):
    return 10*(1-np.exp(-incentive/400.0))
    
def get_improvement_in_renewal_probability(incentive):
    effort = get_effort(incentive)
    return 20*(1-np.exp(-effort/5.0))

def gradient_improvement_in_renewal_probability(incentive):
    dpde = 4 * np.exp(-get_effort(incentive)/5.0)
    dedi = np.exp(-incentive/400.0)/40.0
    dpdi = dpde * dedi
    return  dpdi
    
def revenue_score(incentive, benchmark, premium):
    cdp = get_improvement_in_renewal_probability(incentive)
    profits = ((benchmark + (cdp*benchmark/100.0)) * premium)-incentive
    return np.sum(profits)

def score_gradient(incentive, benchmark, premium):
    z=  np.exp(-incentive/400.0)-(incentive/400.0)-2
    return ((benchmark * premium * np.exp(z)/400.0)-1)

def second_score_gradient(incentive, benchmark, premium):
    a = np.exp(-incentive/400.0)-(incentive/400.0)
    b = (benchmark* premium* np.exp(a))/400.0
    return (-(b)-(1.0/400.0))/400.0

def learn(benchmark, premium, early_stopping = 5, start_incentive = 1700, min_improvement = 0.005, verbose = True):
    incentive = start_incentive
    best_incentive = start_incentive
    
    score = 0
    best_score = 0
    
    counter = 0
    best_counter = 0
    no_improvement_counter = 0
    
    while True:
        lr = (incentive - 0)
        gradient = score_gradient(incentive, benchmark, premium)
        try:
            incentive += lr*(gradient*0.5 + 0.5*prev_gradient)
        except:
            incentive += lr*gradient
            
        
        score = revenue_score(incentive, benchmark, premium)
        counter +=1
        prev_gradient = gradient

        if (score-best_score> min_improvement):
            best_score = score
            best_incentive = incentive
            best_counter = counter
            
            no_improvement_counter = 0
            if verbose: print "Epoch: {}, Incentive: {}, Score: {}, Gradient: {}".format(counter,incentive, score, gradient)
        else:
            if (no_improvement_counter > early_stopping):
                if verbose: print "Early Stopping, Best Iteration Round: {}".format(best_counter)
                return best_incentive, best_score
            else:
                no_improvement_counter +=1
                if verbose: print "Epoch: {}, Incentive: {}, Score: {}, Gradient: {}".format(counter,incentive, score, gradient)

                    
def annealing(benchmark, premium, num_annealing = 10):
    results = []
    for incentive in range(0,premium,int(float(premium)/num_annealing)):
        results.append(learn(benchmark, premium, early_stopping = 5, start_incentive = incentive, min_improvement = 0.005, verbose = False))
    return sorted(results, key = lambda x: x[1])[-1][0] 

In [3]:
submit_files = [pd.read_csv("best_ensemble/csv/eda{}.csv".format(i)) for i in range(1,11)]
submit_files += [pd.read_csv("best_ensemble_tid/csv/eda{}.csv".format(i)) for i in range(1,11)]
submit_files += [pd.read_csv("best_ensemble_bagged/csv/eda{}.csv".format(i)) for i in range(1,11)]
submit_files += [pd.read_csv("best_ensemble_tid_bagged/csv/eda{}.csv".format(i)) for i in range(1,11)]

renewals = [df["renewal"].values for df in submit_files]
incentives = [df["incentives"].values for df in submit_files]
ids = submit_files[0]["id"]

In [4]:
premiums = pd.read_csv("data/sample_submission_sLex1ul.csv", usecols = ["id"]).merge(pd.read_csv("data/test_66516Ee.csv", usecols = ["id","premium"]), how = "left", on="id")["premium"]
all_predicted_incentives = []

for c, benchmarks in enumerate(renewals):
    print "Done {} Predictions".format(c)
    
    incentive_predictions = []
    for b, p in zip(benchmarks, premiums):
        incentive_predictions.append(annealing(b, p))
    all_predicted_incentives.append(np.array(incentive_predictions))

Done 0 Predictions
Done 1 Predictions
Done 2 Predictions
Done 3 Predictions
Done 4 Predictions
Done 5 Predictions
Done 6 Predictions
Done 7 Predictions
Done 8 Predictions
Done 9 Predictions
Done 10 Predictions
Done 11 Predictions
Done 12 Predictions
Done 13 Predictions
Done 14 Predictions
Done 15 Predictions
Done 16 Predictions
Done 17 Predictions
Done 18 Predictions
Done 19 Predictions
Done 20 Predictions
Done 21 Predictions
Done 22 Predictions
Done 23 Predictions
Done 24 Predictions
Done 25 Predictions
Done 26 Predictions
Done 27 Predictions
Done 28 Predictions
Done 29 Predictions
Done 30 Predictions
Done 31 Predictions
Done 32 Predictions
Done 33 Predictions
Done 34 Predictions
Done 35 Predictions
Done 36 Predictions
Done 37 Predictions
Done 38 Predictions
Done 39 Predictions


In [5]:
all_predicted_incentives = pd.DataFrame(np.array(all_predicted_incentives).T).mean(axis=1)
incentives = pd.DataFrame(np.array(incentives).T).mean(axis=1)

incentives = (incentives+all_predicted_incentives)/2
renewals = pd.DataFrame(np.array(renewals).T).rank(pct = True, axis=0).mean(axis=1)

In [6]:
submission = pd.DataFrame()
submission["renewal"] = renewals
submission["incentives"] = incentives
submission["id"] = ids
submission.to_csv("submission/incentives_recalculated_ensemble.csv", index=False)