# Fine tuning averaging weights

Fine tuning models could dramatically enhance model's performance.

In the last notebook we have been trying to get the best of the blending technique by changing weights manually.
During this notebook we will write an optimization function.

In [None]:
import glob
import pandas as pd
import numpy as np

In [None]:
INPUT_DIR="../data-nlp/input/"
OUTPUT_DIR="../data-nlp/output/"

RAND=10

## Generating a new dataframe from basemodels

In [None]:
files = glob.glob(OUTPUT_DIR+"02_basemodel*.csv")

df = None

for f in files:
    if df is None:
        df = pd.read_csv(f)
    else:
        temp_df = pd.read_csv(f)
        df = df.merge(temp_df, on="id", how="left")

df.head(10)

Unnamed: 0.1,id,sentiment_x,kfold_x,lr_cnt_pred,Unnamed: 0,sentiment_y,kfold_y,lr_pred,sentiment,kfold,rf_svd_pred
0,11703_9,1,0,0.9996652,0,1,0,0.868421,1,0,0.57
1,6742_8,1,0,0.9866726,1,1,0,0.727763,1,0,0.59
2,10071_1,0,0,0.4653006,2,0,0,0.441153,0,0,0.54
3,9841_7,1,0,0.9999992,3,1,0,0.943808,1,0,0.73
4,11579_10,1,0,0.9851556,4,1,0,0.900753,1,0,0.51
5,5216_8,1,0,0.9852354,5,1,0,0.729515,1,0,0.51
6,1683_7,1,0,0.006576467,6,1,0,0.309059,1,0,0.4
7,818_4,0,0,0.9940245,7,0,0,0.7138,0,0,0.73
8,11779_3,0,0,0.02116804,8,0,0,0.307074,0,0,0.32
9,27_1,0,0,2.024757e-11,9,0,0,0.001033,0,0,0.12


## Selecting the newly generated features

In [None]:
pred_features = ["lr_pred", "lr_cnt_pred", "rf_svd_pred"]

In [None]:
from sklearn.metrics import roc_auc_score

targets = df.sentiment.values

## Printing  initial AUC values for different basemodels

In [None]:
for col in pred_features:
    auc = roc_auc_score(targets, df[col].values)
    print(f"{col}, overall_auc={auc}")



lr_pred, overall_auc=0.9557241343999999
lr_cnt_pred, overall_auc=0.9462495167999999
rf_svd_pred, overall_auc=0.8796757056


## Importing essential functions

In [None]:
from functools import partial
from scipy.optimize import fmin

## Using a class for optimizing weights

In [None]:
class OptimizeAUC:
    def __init__(self):
        self.coef_ = 0

    # this is an intermediate callable function to be used with partial for optimization
    def _auc(self, coef, X, y):
        x_coef = X * coef
        predictions = np.sum(x_coef, axis=1)
        auc_score = roc_auc_score(y, predictions)

        # returning negative auc because we are trying to minimize it
        # minimization is through fmin from scipy
        return -1.0 * auc_score

    def fit(self, X, y):
        partial_loss = partial(self._auc, X=X, y=y)
        init_coef = np.random.dirichlet(np.ones(X.shape[1]))
        self.coef_ = fmin(partial_loss, init_coef, disp=True)

    def predict(self, X):
        x_coef = X * self.coef_
        predictions = np.sum(x_coef, axis=1)
        return predictions


## Finding the best coefficients for each fold

In [None]:
def run_training(pred_df, fold):

    train_df = pred_df[pred_df.kfold != fold].reset_index(drop=True)
    valid_df = pred_df[pred_df.kfold == fold].reset_index(drop=True)

    xtrain = train_df[pred_features].values
    xvalid = valid_df[pred_features].values

    opt = OptimizeAUC()
    opt.fit(xtrain, train_df.sentiment.values)
    preds = opt.predict(xvalid)

    auc = roc_auc_score(valid_df.sentiment.values, preds)
    print(f"fold={fold}, auc={auc}\n")

    valid_df.loc[:, "opt_pred"] = preds

    return opt.coef_

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=b1ab3b60-9130-40c7-8fa2-288950ad463c' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>