In [1]:
import os
import logging
import dataclasses
import pandas as pd
import numpy as np
from typing import cast, Dict, Optional, Sequence, Tuple, Union, List, Text
import math
from sklearn.metrics import precision_recall_fscore_support,roc_curve,auc
import matplotlib.pyplot as plt

import tensorflow_recommenders as tfrs
import tensorflow as tf
from tensorflow.python.keras.callbacks import ModelCheckpoint

#from tqdm.notebook import tqdm
from tqdm import tqdm
logging.getLogger('tensorflow').propagate = False

In [2]:
!bq extract --noprint_header \
     maximal-furnace-783:Ashish.ranker_isp_ou \
     gs://tpu-cg-us/Ashish/ranker_isp_ou/*.csv

Waiting on bqjob_r57d920eaed498fca_00000186967dbd6b_1 ... (108s) Current status: DONE   


In [3]:
strategy = tf.distribute.get_strategy()

In [4]:
cols = "isp_date,model,userId,postId,unified_signal1,combined_score,video_play,isp_score,unified_signal,hour,dayofweek,is_weekend,is_morning,is_afternoon,is_evening,is_night,tagId,pvplay_0,pvplay_1,pvplay_2,pvplay_3,pvplay_4,pvplay_5,pvplay_6,pvplay_7,pvplay_8,pvplay_9,pvplay_10,pvplay_11,pvplay_12,pvplay_13,pvplay_14,pvplay_15,pvplay_16,pvplay_17,pvplay_18,pvplay_19,pvplay_20,pvplay_21,pvplay_22,pvplay_23,pvplay_24,pvplay_25,pvplay_26,pvplay_27,pvplay_28,pvplay_29,pvplay_30,pvplay_31,pvplay_mask,pfav_0,pfav_1,pfav_2,pfav_3,pfav_4,pfav_5,pfav_6,pfav_7,pfav_8,pfav_9,pfav_10,pfav_11,pfav_12,pfav_13,pfav_14,pfav_15,pfav_16,pfav_17,pfav_18,pfav_19,pfav_20,pfav_21,pfav_22,pfav_23,pfav_24,pfav_25,pfav_26,pfav_27,pfav_28,pfav_29,pfav_30,pfav_31,pfav_mask,plike_0,plike_1,plike_2,plike_3,plike_4,plike_5,plike_6,plike_7,plike_8,plike_9,plike_10,plike_11,plike_12,plike_13,plike_14,plike_15,plike_16,plike_17,plike_18,plike_19,plike_20,plike_21,plike_22,plike_23,plike_24,plike_25,plike_26,plike_27,plike_28,plike_29,plike_30,plike_31,plike_mask,pshare_0,pshare_1,pshare_2,pshare_3,pshare_4,pshare_5,pshare_6,pshare_7,pshare_8,pshare_9,pshare_10,pshare_11,pshare_12,pshare_13,pshare_14,pshare_15,pshare_16,pshare_17,pshare_18,pshare_19,pshare_20,pshare_21,pshare_22,pshare_23,pshare_24,pshare_25,pshare_26,pshare_27,pshare_28,pshare_29,pshare_30,pshare_31,pshare_mask,postLikeRatio2h,postShareRatio2h,postFavRatio2h,postCommentRatio2h,postSVPRatio2h,postLPORatio2h,postLikeRatio1D,postShareRatio1D,postFavRatio1D,postCommentRatio1D,postSVPRatio1D,postLPORatio1D,pcLikeRatio2h,pcShareRatio2h,pcFavRatio2h,pcCommentRatio2h,pcSVPRatio2h,pcLPORatio2h,pcLikeRatio1D,pcShareRatio1D,pcFavRatio1D,pcCommentRatio1D,pcSVPRatio1D,pcLPORatio1D,userDistrict,uvplay_0,uvplay_1,uvplay_2,uvplay_3,uvplay_4,uvplay_5,uvplay_6,uvplay_7,uvplay_8,uvplay_9,uvplay_10,uvplay_11,uvplay_12,uvplay_13,uvplay_14,uvplay_15,uvplay_16,uvplay_17,uvplay_18,uvplay_19,uvplay_20,uvplay_21,uvplay_22,uvplay_23,uvplay_24,uvplay_25,uvplay_26,uvplay_27,uvplay_28,uvplay_29,uvplay_30,uvplay_31,uvplay_mask,ufav_0,ufav_1,ufav_2,ufav_3,ufav_4,ufav_5,ufav_6,ufav_7,ufav_8,ufav_9,ufav_10,ufav_11,ufav_12,ufav_13,ufav_14,ufav_15,ufav_16,ufav_17,ufav_18,ufav_19,ufav_20,ufav_21,ufav_22,ufav_23,ufav_24,ufav_25,ufav_26,ufav_27,ufav_28,ufav_29,ufav_30,ufav_31,ufav_mask,ulike_0,ulike_1,ulike_2,ulike_3,ulike_4,ulike_5,ulike_6,ulike_7,ulike_8,ulike_9,ulike_10,ulike_11,ulike_12,ulike_13,ulike_14,ulike_15,ulike_16,ulike_17,ulike_18,ulike_19,ulike_20,ulike_21,ulike_22,ulike_23,ulike_24,ulike_25,ulike_26,ulike_27,ulike_28,ulike_29,ulike_30,ulike_31,ulike_mask,ushare_0,ushare_1,ushare_2,ushare_3,ushare_4,ushare_5,ushare_6,ushare_7,ushare_8,ushare_9,ushare_10,ushare_11,ushare_12,ushare_13,ushare_14,ushare_15,ushare_16,ushare_17,ushare_18,ushare_19,ushare_20,ushare_21,ushare_22,ushare_23,ushare_24,ushare_25,ushare_26,ushare_27,ushare_28,ushare_29,ushare_30,ushare_31,ushare_mask,video_affinity,userLikeRatio1,userShareRatio1,userFavRatio1,userCommentsRatio1,userSVPRatio1,userLPORatio1,userLikeRatio7,userShareRatio7,userFavRatio7,userCommentsRatio7,userSVPRatio7,userLPORatio7,upcLikeRatio1D,upcShareRatio1D,upcFavRatio1D,upcCommentRatio1D,upcSVPRatio1D,upcLPORatio1D,upcLikeRatio3D,upcShareRatio3D,upcFavRatio3D,upcCommentRatio3D,upcSVPRatio3D,upcLPORatio3D,upcLikeRatio7D,upcShareRatio7D,upcFavRatio7D,upcCommentRatio7D,upcSVPRatio7D,upcLPORatio7D,engtag_0,engtag_1,engtag_2,engtag_3,engtag_4,engtag_5,engtag_6,engtag_7,engtag_8,engtag_9,engtag_10,engtag_11,engtag_12,engtag_13,engtag_14,engtag_15,engtag_16,engtag_17,engtag_18,engtag_19,engtag_20,engtag_21,engtag_22,engtag_23,engtag_24,engtag_mask_0,engtag_mask_1,engtag_mask_2,engtag_mask_3,engtag_mask_4,engtag_mask_5,engtag_mask_6,engtag_mask_7,engtag_mask_8,engtag_mask_9,engtag_mask_10,engtag_mask_11,engtag_mask_12,engtag_mask_13,engtag_mask_14,engtag_mask_15,engtag_mask_16,engtag_mask_17,engtag_mask_18,engtag_mask_19,engtag_mask_20,engtag_mask_21,engtag_mask_22,engtag_mask_23,engtag_mask_24"
col_names = cols.split(",")
print(len(col_names))

387


In [28]:
num_labels = 1

hour_feat = 1
dayofweek = 1
num_other_features = 5

sparse_features = [
    'userDistrict',
    'tagId'
]

max_sequence_length = 25

vocab_sizes = {
    'userDistrict': 720,
    'tagId': 4000
}

embedding_dims = {
    'userDistrict': 32,
    'tagId': 32,
}

meta = [
    'isp_date','model','userId','postId','unified_signal1','combined_score','video_play','isp_score'
#      'combinedscore','video_play','likes','shares','userId', 'postId', 'L0', 'L1', 'L2'
] 

other_feats = [
    'hour', 'dayofweek', 'is_weekend', 'is_morning', 'is_afternoon', 'is_evening', 'is_night'
]

user_sparse_features = [
    'userDistrict'
]

user_dense_features = [
    "uvplay_0","uvplay_1","uvplay_2","uvplay_3","uvplay_4","uvplay_5","uvplay_6","uvplay_7",
    "uvplay_8","uvplay_9","uvplay_10","uvplay_11","uvplay_12","uvplay_13","uvplay_14","uvplay_15",
    "uvplay_16","uvplay_17","uvplay_18","uvplay_19","uvplay_20","uvplay_21","uvplay_22","uvplay_23",
    "uvplay_24","uvplay_25","uvplay_26","uvplay_27","uvplay_28","uvplay_29","uvplay_30","uvplay_31","uvplay_mask",
    "ufav_0","ufav_1","ufav_2","ufav_3","ufav_4","ufav_5","ufav_6","ufav_7",
    "ufav_8","ufav_9","ufav_10","ufav_11","ufav_12","ufav_13","ufav_14","ufav_15",
    "ufav_16","ufav_17","ufav_18","ufav_19","ufav_20","ufav_21","ufav_22","ufav_23",
    "ufav_24","ufav_25","ufav_26","ufav_27","ufav_28","ufav_29","ufav_30","ufav_31","ufav_mask",
    "ulike_0","ulike_1","ulike_2","ulike_3","ulike_4","ulike_5","ulike_6","ulike_7",
    "ulike_8","ulike_9","ulike_10","ulike_11","ulike_12","ulike_13","ulike_14","ulike_15",
    "ulike_16","ulike_17","ulike_18","ulike_19","ulike_20","ulike_21","ulike_22","ulike_23",
    "ulike_24","ulike_25","ulike_26","ulike_27","ulike_28","ulike_29","ulike_30","ulike_31","ulike_mask",
    "ushare_0","ushare_1","ushare_2","ushare_3","ushare_4","ushare_5","ushare_6","ushare_7",
    "ushare_8","ushare_9","ushare_10","ushare_11","ushare_12","ushare_13","ushare_14","ushare_15",
    "ushare_16","ushare_17","ushare_18","ushare_19","ushare_20","ushare_21","ushare_22","ushare_23",
    "ushare_24","ushare_25","ushare_26","ushare_27","ushare_28","ushare_29","ushare_30","ushare_31","ushare_mask",
    "video_affinity",
    "userLikeRatio1","userShareRatio1","userFavRatio1","userCommentsRatio1","userSVPRatio1","userLPORatio1",
    "userLikeRatio7","userShareRatio7","userFavRatio7","userCommentsRatio7","userSVPRatio7","userLPORatio7",
    "upcLikeRatio1D","upcShareRatio1D","upcFavRatio1D","upcCommentRatio1D","upcSVPRatio1D","upcLPORatio1D",
    "upcLikeRatio3D","upcShareRatio3D","upcFavRatio3D","upcCommentRatio3D","upcSVPRatio3D","upcLPORatio3D",
    "upcLikeRatio7D","upcShareRatio7D","upcFavRatio7D","upcCommentRatio7D","upcSVPRatio7D","upcLPORatio7D"
]

user_engaged_tags = [
    "engtag_0","engtag_1","engtag_2","engtag_3","engtag_4",
    "engtag_5","engtag_6","engtag_7","engtag_8","engtag_9",
    "engtag_10","engtag_11","engtag_12","engtag_13","engtag_14",
    "engtag_15","engtag_16","engtag_17","engtag_18","engtag_19",
    "engtag_20","engtag_21","engtag_22","engtag_23","engtag_24",
    
    "engtag_mask_0","engtag_mask_1","engtag_mask_2","engtag_mask_3","engtag_mask_4",
    "engtag_mask_5","engtag_mask_6","engtag_mask_7","engtag_mask_8","engtag_mask_9",
    "engtag_mask_10","engtag_mask_11","engtag_mask_12","engtag_mask_13","engtag_mask_14",
    "engtag_mask_15","engtag_mask_16","engtag_mask_17","engtag_mask_18","engtag_mask_19",
    "engtag_mask_20","engtag_mask_21","engtag_mask_22","engtag_mask_23","engtag_mask_24"
]

post_sparse_features = [
    'tagId'
]
post_dense_features = [
    "pvplay_0","pvplay_1","pvplay_2","pvplay_3","pvplay_4","pvplay_5","pvplay_6","pvplay_7",
    "pvplay_8","pvplay_9","pvplay_10","pvplay_11","pvplay_12","pvplay_13","pvplay_14","pvplay_15",
    "pvplay_16","pvplay_17","pvplay_18","pvplay_19","pvplay_20","pvplay_21","pvplay_22","pvplay_23",
    "pvplay_24","pvplay_25","pvplay_26","pvplay_27","pvplay_28","pvplay_29","pvplay_30","pvplay_31","pvplay_mask",
    "pfav_0","pfav_1","pfav_2","pfav_3","pfav_4","pfav_5","pfav_6","pfav_7",
    "pfav_8","pfav_9","pfav_10","pfav_11","pfav_12","pfav_13","pfav_14","pfav_15",
    "pfav_16","pfav_17","pfav_18","pfav_19","pfav_20","pfav_21","pfav_22","pfav_23",
    "pfav_24","pfav_25","pfav_26","pfav_27","pfav_28","pfav_29","pfav_30","pfav_31","pfav_mask",
    "plike_0","plike_1","plike_2","plike_3","plike_4","plike_5","plike_6","plike_7",
    "plike_8","plike_9","plike_10","plike_11","plike_12","plike_13","plike_14","plike_15",
    "plike_16","plike_17","plike_18","plike_19","plike_20","plike_21","plike_22","plike_23",
    "plike_24","plike_25","plike_26","plike_27","plike_28","plike_29","plike_30","plike_31","plike_mask",
    "pshare_0","pshare_1","pshare_2","pshare_3","pshare_4","pshare_5","pshare_6","pshare_7",
    "pshare_8","pshare_9","pshare_10","pshare_11","pshare_12","pshare_13","pshare_14","pshare_15",
    "pshare_16","pshare_17","pshare_18","pshare_19","pshare_20","pshare_21","pshare_22","pshare_23",
    "pshare_24","pshare_25","pshare_26","pshare_27","pshare_28","pshare_29","pshare_30","pshare_31","pshare_mask",
    "postLikeRatio2h","postShareRatio2h","postFavRatio2h","postCommentRatio2h","postSVPRatio2h","postLPORatio2h",
    "postLikeRatio1D","postShareRatio1D","postFavRatio1D","postCommentRatio1D","postSVPRatio1D","postLPORatio1D",
    "pcLikeRatio2h","pcShareRatio2h","pcFavRatio2h","pcCommentRatio2h","pcSVPRatio2h","pcLPORatio2h",
    "pcLikeRatio1D","pcShareRatio1D","pcFavRatio1D","pcCommentRatio1D","pcSVPRatio1D"," pcLPORatio1D"
]

ignore_features = [

]

DROPOUT = 0.4
L2REG = 1e-4
LR = 0.001

#'''
# change this - when using the total dataset
batch_size = 50000
NUM_TEST_EXAMPLES =  50000     
#NUM_TEST_EXAMPLES = 208990
#'''

'''
# small size testing 
batch_size = 1000
NUM_TEST_EXAMPLES = 2000
'''


num_of_validations = 6

test_folder = "ranker_isp_ou"

# model_folder = "sc_ranker_debiasing_Hindi_video_wide_and_deep"
# model_folder = "sc_ranker_debiasing_Hindi_video_wide_and_deep_popular_sampling"
# model_folder = "sc_ranker_debiasing_Hindi_video_mask_net"
# model_folder = "sc_ranker_debiasing_Hindi_video_mask_net_popular_sampling"
# model_folder = "sc_ranker_debiasing_Hindi_video_mask_net_serial"
# model_folder = "sc_ranker_debiasing_Hindi_video_mask_net_serial_popular_sampling"
# model_folder = "sc_ranker_debiasing_Hindi_video_deep_cross_network"
# model_folder = "sc_ranker_debiasing_Hindi_video_deep_cross_network_popular_sampling"
# model_folder = "sc_ranker_debiasing_Hindi_video_deep_cross_network_serial"
model_folder = "unified_signal_Hindi_video_mask_net_serial_sampled"

# model_name = "wide_and_deep"
# model_name = "wide_and_deep_popular"
# model_name = "mask_net"
# model_name = "mask_net_popular"
model_name = "mask_net_serial"
# model_name = "mask_net_serial_popular"
# model_name = "deep_cross_net"
# model_name = "deep_cross_net_popular"
# model_name = "deep_cross_net_serial"
#model_name = "deep_cross_net_serial_popular"

TESTDATA_DIR = "gs://tpu-cg-us/Ashish/ranker_isp_ou"

MODEL_DIR = 'gs://sharechat-prod-bigquery-data/dca_ranker/v0/dca_ranker/2023/02/17/dca_ranker_2023_02_17_10_12_06_4821_hindi_video_model_unified/'
MODEL_DIR_LOCAL = model_folder
#os.system(f"mkdir -p {MODEL_DIR_LOCAL}")

In [29]:
def create_distribute_input_option():
    # Add a try...except block as OSS tensorflow_recommenders is depending on
    # stable TF version, i.e. TF2.4.
    try:
        return tf.distribute.InputOptions(experimental_fetch_to_device=False)
    except TypeError:
        return tf.distribute.InputOptions(experimental_prefetch_to_device=False)

@dataclasses.dataclass
class DataConfig:
    """Dataset config for training and evaluation."""
    input_path: str = ''
    global_batch_size: int = batch_size
    is_training: bool = True
    dtype: str = 'float32'
    shuffle_buffer_size: int = 10000
    cycle_length: int = 8
    sharding: bool = True
    num_shards_per_host: int = 8

In [30]:
tag_mapping = pd.read_csv(
    "sc_ranker_debiasing-sc_ranker_debiasing_tag_index_mapping-000000000000.csv",
    dtype={'tagId': 'str'}
)
district_mapping = pd.read_csv(
    "userDistrict_mapping.csv",#"sc_ranker_debiasing-sc_ranker_debiasing_district_index_mapping-000000000000.csv",
    dtype={'userDistrict': 'str'}
)

tag_mapping.sort_values(by='tag_index', axis=0, inplace=True)
tag_mapping.reset_index(drop=True, inplace=True)
district_mapping.fillna("null", inplace=True)

district_mapping.sort_values(by='district_index', axis=0, inplace=True)
district_mapping.reset_index(drop=True, inplace=True)
district_mapping.fillna("null", inplace=True)

tag_index = {
    'keys': list(tag_mapping.tagId),
    'values': list(tag_mapping.tag_index),
}

district_index = {
    'keys': list(district_mapping.userDistrict),
    'values': list(district_mapping.district_index),
}

In [31]:
class CSVReader(object):
    def __init__(self, params, num_labels, field_delim=",", use_fake_data=False):
        self.params = params
        self.num_labels = num_labels
        self.field_delim = field_delim
        self._use_fake_data = use_fake_data
        
        self.tag_index = tf.lookup.StaticHashTable(
            tf.lookup.KeyValueTensorInitializer(tag_index['keys'], tag_index['values']),
            default_value=0
        )
        
        self.district_index = tf.lookup.StaticHashTable(
            tf.lookup.KeyValueTensorInitializer(district_index['keys'], district_index['values']),
            default_value=0
        )
        
    def __call__(self, ctx: tf.distribute.InputContext):
        params = self.params
        batch_size = ctx.get_per_replica_batch_size(
            params.global_batch_size
        ) if ctx else params.global_batch_size
        
        @tf.function
        def _parse_fn(example):
            num_sparse_features = len(vocab_sizes)
            
          
            meta_defaults = [''] * len(meta)
            
            
            label_defaults = [0.0] * num_labels
            
            other_feat_defaults = [0.0] * (hour_feat+dayofweek+num_other_features)
            
            post_sparse_defaults = ['0'] * len(post_sparse_features)
            post_dense_defaults = [-1.0] * len(post_dense_features)
            
            user_sparse_defaults = ['0'] * len(user_sparse_features)
            user_dense_defaults = [-1.0] * len(user_dense_features)
            user_engaged_tags_defaults = ['0'] * (len(user_engaged_tags)//2) + [0.0] * (len(user_engaged_tags)//2)

            record_defaults =   meta_defaults + label_defaults + \
                                other_feat_defaults + \
                                post_sparse_defaults + \
                                post_dense_defaults + \
                                user_sparse_defaults + \
                                user_dense_defaults + \
                                user_engaged_tags_defaults

            fields = tf.io.decode_csv(example, record_defaults,
                                      field_delim=self.field_delim, na_value='')
            
            offset = 0
            
            
            meta_feats = {}
            for idx in range(len(meta)):
                if col_names[idx+offset] in ignore_features:
                    continue
                meta_feats[col_names[idx+offset]] = fields[idx+offset]
            #meta_feats["post_popularity"] = fields[59]
            meta_feats["post_popularity"] = fields[163]
            offset += len(meta)
            
            
            #label = fields[offset+0]
            label = tf.cast(fields[offset+0], tf.float32)
            offset += num_labels

            features = {'time': {}, 'sparse_features': {} , 'meta': meta_feats} 

            for idx in range(hour_feat+dayofweek+num_other_features):
                if col_names[idx+offset] in ignore_features:
                    continue
                if col_names[idx+offset] in ('hour', 'dayofweek'):
                    features['time'][col_names[idx+offset]] = tf.cast(fields[idx+offset], tf.int32)
                else:
                    features['time'][col_names[idx+offset]] = tf.cast(tf.expand_dims(fields[idx+offset], axis=-1), tf.float32)
            offset += hour_feat+dayofweek+num_other_features

            
            for idx in range(len(post_sparse_features)):
                if col_names[idx+offset] in ignore_features:
                    continue
                    
                if col_names[idx+offset] == "tagId":
                    features['sparse_features'][col_names[idx+offset]] = self.tag_index.lookup(fields[idx+offset])
                else:
                    features['sparse_features'][col_names[idx+offset]] = fields[idx+offset]
            offset += len(post_sparse_features)

            feat = []
            post_embed = []
            for idx in range(len(post_dense_features)):
                if col_names[idx+offset] in ignore_features:
                    continue
                if col_names[idx+offset].startswith('pvplay') or col_names[idx+offset].startswith('pfav') or col_names[idx+offset].startswith('plike') or col_names[idx+offset].startswith('pshare'):
                    post_embed.append(fields[idx + offset])
                feat.append(fields[idx + offset])
            features['post_dense_features'] = tf.stack(feat, axis=1)
            features['post_embed'] = tf.stack(post_embed, axis=1)
            offset += len(post_dense_features)
            
            
            for idx in range(len(user_sparse_features)):
                if col_names[idx+offset] in ignore_features:
                    continue
                if col_names[idx+offset] == "userDistrict":
                    features['sparse_features'][col_names[idx+offset]] = self.district_index.lookup(fields[idx+offset])
                else:
                    features['sparse_features'][col_names[idx+offset]] = fields[idx+offset]
            offset += len(user_sparse_features)
            
            feat = []
            user_embed = []
            for idx in range(len(user_dense_features)):
                if col_names[idx+offset] in ignore_features:
                    continue
                if col_names[idx+offset].startswith('uvplay') or col_names[idx+offset].startswith('ufav') or col_names[idx+offset].startswith('ulike') or col_names[idx+offset].startswith('ushare'):
                    user_embed.append(fields[idx + offset])
                feat.append(fields[idx + offset])
            features['user_dense_features'] = tf.stack(feat, axis=1)
            features['user_embed'] = tf.stack(user_embed, axis=1)
            offset += len(user_dense_features)
            
            
            eng_tags_mask = []
            eng_tags = []
            for idx in range(len(user_engaged_tags)):
                if col_names[idx+offset] in ignore_features:
                    continue
                if 'mask' in col_names[idx+offset]:
                    eng_tags_mask.append(fields[idx + offset])
                else:
                    eng_tags.append(self.tag_index.lookup(fields[idx + offset]))
            features['sparse_features']['eng_tags'] = tf.stack(eng_tags, axis=1)
            features['eng_tags_mask'] = tf.stack(eng_tags_mask, axis=1)
            offset += len(user_engaged_tags)
            
            return features, label
        
        filenames = tf.data.Dataset.list_files(params.input_path, shuffle=False)
        
        if params.sharding and ctx and ctx.num_input_pipelines > 1:
            filenames = filenames.shard(ctx.num_input_pipelines, ctx.input_pipeline_id)
            
        num_shards_per_host = 1
        if params.sharding:
            num_shards_per_host = params.num_shards_per_host

        def make_dataset(shard_index):
            filenames_for_shard = filenames.shard(num_shards_per_host, shard_index)
            dataset = tf.data.TextLineDataset(filenames_for_shard)
            dataset = dataset.batch(batch_size, drop_remainder=False)
            dataset = dataset.map(_parse_fn, num_parallel_calls=tf.data.experimental.AUTOTUNE)
            
            
            return dataset
        indices = tf.data.Dataset.range(num_shards_per_host)
        dataset = indices.interleave(
            map_func=make_dataset,
            cycle_length=params.cycle_length,
            num_parallel_calls=tf.data.experimental.AUTOTUNE
        )
        dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
        
        if self._use_fake_data:
            dataset = dataset.take(1).cache().repeat()
            
        return dataset

In [32]:
TESTDATA_DIR

'gs://tpu-cg-us/Ashish/ranker_isp_ou'

In [33]:
test_params = DataConfig(
    input_path=f'{TESTDATA_DIR}/*',
    is_training=False,
    sharding=False
)
test_dataset_callable = CSVReader(
    params=test_params,
    num_labels=num_labels
)

test_dataset = strategy.distribute_datasets_from_function(
    dataset_fn=test_dataset_callable,
    options=create_distribute_input_option()
)

test_steps = NUM_TEST_EXAMPLES // batch_size

print(f"test_steps: {test_steps}")

test_steps: 1


In [34]:
class MaskNetModelSerial(tfrs.models.Model):
    def __init__(self):
        super().__init__()
        
        self.rescale_factor = 2.0
        
        self.tag_embedding = tf.keras.layers.Embedding(
                input_dim=vocab_sizes['tagId'],
                output_dim=embedding_dims['tagId'],
                input_length=1
        )
        self.eng_tag_embedding = tf.keras.layers.Embedding(
                input_dim=vocab_sizes['tagId'],
                output_dim=embedding_dims['tagId'],
                input_length=25
        )

        self.district_embedding = tf.keras.layers.Embedding(
                input_dim=vocab_sizes['userDistrict'],
                output_dim=embedding_dims['userDistrict'],
                input_length=1
        )
        
        with tf.compat.v1.variable_scope("MaskBlock_time"):
            self.time_mask = tf.keras.Sequential([
                tf.keras.layers.Dense(
                  units=int(feature_shapes['time']*self.rescale_factor),
                  kernel_initializer=tf.keras.initializers.VarianceScaling(),
                  kernel_regularizer=tf.keras.regularizers.L2(L2REG),
                  activation="relu"
                ),
                tf.keras.layers.Dense(
                  units=feature_shapes['time'],
                  kernel_initializer=tf.keras.initializers.VarianceScaling(),
                  kernel_regularizer=tf.keras.regularizers.L2(L2REG),
                )
            ], name="InstanceGuidedMask_time")
            self.time_norm = tf.keras.layers.LayerNormalization()
            self.time_mask_emb = tf.keras.Sequential([
                tf.keras.layers.Dense(
                  units=feature_shapes['user_embed'] + feature_shapes['userDistrict'],
                  kernel_initializer=tf.keras.initializers.VarianceScaling(),
                  kernel_regularizer=tf.keras.regularizers.L2(L2REG),
                ),
                tf.keras.layers.LayerNormalization()
            ], name="MaskBlock_time")
            
        with tf.compat.v1.variable_scope("MaskBlock_user_sparse"):
            self.user_sparse_mask = tf.keras.Sequential([
                tf.keras.layers.Dense(
                  units=int((feature_shapes['user_embed'] + feature_shapes['userDistrict'])
                            *self.rescale_factor),
                  kernel_initializer=tf.keras.initializers.VarianceScaling(),
                  kernel_regularizer=tf.keras.regularizers.L2(L2REG),
                  activation="relu"
                ),
                tf.keras.layers.Dense(
                  units=(feature_shapes['user_embed'] + feature_shapes['userDistrict']),
                  kernel_initializer=tf.keras.initializers.VarianceScaling(),
                  kernel_regularizer=tf.keras.regularizers.L2(L2REG),
                )
            ], name="InstanceGuidedMask_user_sparse")
            self.user_sparse_mask_emb = tf.keras.Sequential([
                tf.keras.layers.Dense(
                  units=feature_shapes['eng_tags'],
                  kernel_initializer=tf.keras.initializers.VarianceScaling(),
                  kernel_regularizer=tf.keras.regularizers.L2(L2REG),
                ),
                tf.keras.layers.LayerNormalization()
            ], name="MaskBlock_user_sparse")
            
        with tf.compat.v1.variable_scope("MaskBlock_user_tags"):
            self.user_tags_mask = tf.keras.Sequential([
                tf.keras.layers.Dense(
                  units=int(feature_shapes['eng_tags']*self.rescale_factor),
                  kernel_initializer=tf.keras.initializers.VarianceScaling(),
                  kernel_regularizer=tf.keras.regularizers.L2(L2REG),
                  activation="relu"
                ),
                tf.keras.layers.Dense(
                  units=feature_shapes['eng_tags'],
                  kernel_initializer=tf.keras.initializers.VarianceScaling(),
                  kernel_regularizer=tf.keras.regularizers.L2(L2REG),
                )
            ], name="InstanceGuidedMask_user_tags")
            self.user_tags_mask_emb = tf.keras.Sequential([
                tf.keras.layers.Dense(
                  units=feature_shapes['user_dense_features'],
                  kernel_initializer=tf.keras.initializers.VarianceScaling(),
                  kernel_regularizer=tf.keras.regularizers.L2(L2REG),
                ),
                tf.keras.layers.LayerNormalization()
            ], name="MaskBlock_user_tags")
        
        with tf.compat.v1.variable_scope("MaskBlock_user_dense"):
            self.user_dense_mask = tf.keras.Sequential([
                tf.keras.layers.Dense(
                  units=int(feature_shapes['user_dense_features']*self.rescale_factor),
                  kernel_initializer=tf.keras.initializers.VarianceScaling(),
                  kernel_regularizer=tf.keras.regularizers.L2(L2REG),
                  activation="relu"
                ),
                tf.keras.layers.Dense(
                  units=feature_shapes['user_dense_features'],
                  kernel_initializer=tf.keras.initializers.VarianceScaling(),
                  kernel_regularizer=tf.keras.regularizers.L2(L2REG),
                )
            ], name="InstanceGuidedMask_user_dense")
            self.user_dense_mask_emb = tf.keras.Sequential([
                tf.keras.layers.Dense(
                  units=(feature_shapes['post_embed'] + feature_shapes['tagId']),
                  kernel_initializer=tf.keras.initializers.VarianceScaling(),
                  kernel_regularizer=tf.keras.regularizers.L2(L2REG),
                ),
                tf.keras.layers.LayerNormalization()
            ], name="MaskBlock_user_dense")
            
        with tf.compat.v1.variable_scope("MaskBlock_post_sparse"):
            self.post_sparse_mask = tf.keras.Sequential([
                tf.keras.layers.Dense(
                  units=int((feature_shapes['post_embed'] + feature_shapes['tagId'])
                            *self.rescale_factor),
                  kernel_initializer=tf.keras.initializers.VarianceScaling(),
                  kernel_regularizer=tf.keras.regularizers.L2(L2REG),
                  activation="relu"
                ),
                tf.keras.layers.Dense(
                  units=(feature_shapes['post_embed'] + feature_shapes['tagId']),
                  kernel_initializer=tf.keras.initializers.VarianceScaling(),
                  kernel_regularizer=tf.keras.regularizers.L2(L2REG),
                )
            ], name="InstanceGuidedMask_post_sparse")
            self.post_sparse_mask_emb = tf.keras.Sequential([
                tf.keras.layers.Dense(
                  units=feature_shapes['post_dense_features'],
                  kernel_initializer=tf.keras.initializers.VarianceScaling(),
                  kernel_regularizer=tf.keras.regularizers.L2(L2REG),
                ),
                tf.keras.layers.LayerNormalization()
            ], name="MaskBlock_post_sparse")
        
        with tf.compat.v1.variable_scope("MaskBlock_post_dense"):
            self.post_dense_mask = tf.keras.Sequential([
                tf.keras.layers.Dense(
                  units=int(feature_shapes['post_dense_features']*self.rescale_factor),
                  kernel_initializer=tf.keras.initializers.VarianceScaling(),
                  kernel_regularizer=tf.keras.regularizers.L2(L2REG),
                  activation="relu"
                ),
                tf.keras.layers.Dense(
                  units=feature_shapes['post_dense_features'],
                  kernel_initializer=tf.keras.initializers.VarianceScaling(),
                  kernel_regularizer=tf.keras.regularizers.L2(L2REG),
                )
            ], name="InstanceGuidedMask_post_dense")
            self.post_dense_mask_emb = tf.keras.Sequential([
                tf.keras.layers.Dense(
                  units=feature_shapes['post_dense_features'],
                  kernel_initializer=tf.keras.initializers.VarianceScaling(),
                  kernel_regularizer=tf.keras.regularizers.L2(L2REG),
                ),
                tf.keras.layers.LayerNormalization()
            ], name="MaskBlock_post_dense")
        
        
        
        with tf.compat.v1.variable_scope("ClassificationTower"):
            self.classification_tower = tf.keras.Sequential([
              tf.keras.layers.Dense(
                  units=1,
                  kernel_initializer=tf.keras.initializers.VarianceScaling(),
                  kernel_regularizer=tf.keras.regularizers.L2(L2REG),
              )
            ])

        self.final_activation = tf.keras.layers.Activation('relu')
        
        self.task: tf.keras.layers.Layer = tfrs.tasks.Ranking(
            #loss=tf.keras.losses.BinaryCrossentropy(reduction=tf.keras.losses.Reduction.NONE),
            loss=tf.keras.losses.MeanSquaredError(reduction=tf.keras.losses.Reduction.NONE),
            metrics=[
                    # tf.keras.metrics.AUC(name="auc"),
                    # tf.keras.metrics.AUC(curve="PR", name="pr-auc"),
                    # tf.keras.metrics.Precision(name="precision"),
                    # tf.keras.metrics.Recall(name="recall"),
                    # tf.keras.metrics.TruePositives(name="TP"),
                    # tf.keras.metrics.FalsePositives(name="FP"),
                    # tf.keras.metrics.CategoricalAccuracy(name='categorical_accuracy'),
                    tf.keras.metrics.Accuracy(name='accuracy'),
                    tf.keras.metrics.CosineSimilarity(name='cosine_similarity', axis=-1),
            ]
        )

        
    def compute_loss(self, inputs, training=False) -> tf.Tensor:
        if len(inputs) == 2:
            features, labels = inputs
            rating_predictions = self(features)
            loss = self.task(labels=labels, predictions=rating_predictions)
        elif len(inputs) == 3:
            features, labels, sample_weight = inputs
            rating_predictions = self(features)
            loss = self.task(labels=labels, predictions=rating_predictions, sample_weight=sample_weight)
        
        loss = tf.reduce_mean(loss)
        return loss / tf.distribute.get_strategy().num_replicas_in_sync
    
    
    def call(self, inputs):
        sparse_features = inputs["sparse_features"]

        tag_embed = self.tag_embedding(sparse_features['tagId'])
        eng_tag_embed = self.tag_embedding(sparse_features['eng_tags'])
        sequence_length = tf.math.reduce_sum(inputs['eng_tags_mask'], axis=1, keepdims=True) + 0.0001
        
        eng_tag_embed = tf.math.divide(tf.math.reduce_sum(eng_tag_embed, axis=1), sequence_length)
        district_embed = self.district_embedding(sparse_features['userDistrict'])
        
        hour = tf.one_hot(inputs['time']['hour'], 24)
        dayofweek = tf.one_hot(inputs['time']['dayofweek'], 7)
        time = tf.keras.layers.Concatenate(axis=-1)([
            hour, dayofweek,
            inputs['time']['is_weekend'],
            inputs['time']['is_morning'],
            inputs['time']['is_afternoon'],
            inputs['time']['is_evening'],
            inputs['time']['is_night'],
        ])
        user_sparse = tf.keras.layers.Concatenate()([inputs['user_embed'], district_embed])
        post_sparse = tf.keras.layers.Concatenate()([inputs['post_embed'], tag_embed])
        user_dense = inputs['user_dense_features']
        post_dense = inputs['post_dense_features']

        
        time_norm = self.time_norm(time)
        time_mask = self.time_mask(time)
        time_mask_emb = self.time_mask_emb(tf.keras.layers.Multiply()([time_norm, time_mask]))
        
        
        #user_sparse_norm = self.user_sparse_norm(user_sparse)
        user_sparse_mask = self.user_sparse_mask(user_sparse)
        user_sparse_mask_emb = self.user_sparse_mask_emb(tf.keras.layers.Multiply()([time_mask_emb, user_sparse_mask]))
        
#         user_tags_norm = self.user_tags_norm(eng_tag_embed)
        user_tags_mask = self.user_tags_mask(eng_tag_embed)
        user_tags_mask_emb = self.user_tags_mask_emb(tf.keras.layers.Multiply()([user_sparse_mask_emb, user_tags_mask]))
        
#         user_dense_norm = self.user_dense_norm(user_dense)
        user_dense_mask = self.user_dense_mask(user_dense)
        user_dense_mask_emb = self.user_dense_mask_emb(tf.keras.layers.Multiply()([user_tags_mask_emb, user_dense_mask]))
        
        
#         post_sparse_norm = self.post_sparse_norm(post_sparse)
        post_sparse_mask = self.post_sparse_mask(post_sparse)
        post_sparse_mask_emb = self.post_sparse_mask_emb(tf.keras.layers.Multiply()([user_dense_mask_emb, post_sparse_mask]))
        
#         post_dense_norm = self.post_dense_norm(post_dense)
        post_dense_mask = self.post_dense_mask(post_dense)
        post_dense_mask_emb = self.post_dense_mask_emb(tf.keras.layers.Multiply()([post_sparse_mask_emb, post_dense_mask]))
        
        
        vector = post_dense_mask_emb
        
        logits = self.classification_tower(vector)
        
        prediction = self.final_activation(logits)
        
        return tf.reshape(prediction, [-1])

    @property
    def embedding_trainable_variables(self) -> Sequence[tf.Variable]:
        return [] #self.embedding_layer.trainable_variables

    @property
    def deep_trainable_variables(self) -> Sequence[tf.Variable]:
        dense_vars = []
        for layer in self.layers:
#             if layer != self.embedding_layer:
            dense_vars.extend(layer.trainable_variables)
        return dense_vars

In [35]:
feature_shapes = {
    'time': 36,
    'post_dense_features': 156,
    'user_dense_features': 163,
    'user_embed': 132,
    'post_embed': 132,
    'tagId': embedding_dims['tagId'],
    'userDistrict': embedding_dims['userDistrict'],
    'eng_tags': embedding_dims['tagId']
}

In [36]:
with strategy.scope():
    # embedding_optimizer = tf.keras.optimizers.Adam(lr=0.001)
    # deep_optimizer = tf.keras.optimizers.Adagrad(lr=0.1)

    
    model = MaskNetModelSerial()
    

    # optimizer = tfrs.experimental.optimizers.CompositeOptimizer([
    #     (embedding_optimizer, lambda: model.embedding_trainable_variables),
    #     (deep_optimizer, lambda: model.deep_trainable_variables),
    # ])
    optimizer = tf.keras.optimizers.Adam(lr=0.0005)
    
    model.load_weights(MODEL_DIR + 'export/variables/variables')
    
    model.compile(optimizer)
    #model.load_weights('gs://deep-ctr/devansh_production/checkpoints/my_checkpoint2')
    #model = tf.keras.models.load_model('gs://deep-ctr/devansh_production/models2/')
    

  super(Adam, self).__init__(name, **kwargs)


In [37]:
pred_df = {
    'isp_date': [],
    'postId': [],
    'userId':[],
    'model': [],
    'isp_score': [],
    'groundtruth': [],
    'prediction': [],
    'production_model_prediction_unified': [],
    'production_model_prediction_combined': [],
}

prediction = []
groundtruth = []
production_model_prediction = []

progress_bar = tqdm(total=test_steps + 1)


for ind, example in enumerate(test_dataset):
    pred = model(example[0])
    pred_df['isp_date'] += list(example[0]['meta']['isp_date'].numpy().astype('str'))
    pred_df['model'] += list(example[0]['meta']['model'].numpy().astype('str'))
    pred_df['postId'] += list(example[0]['meta']['postId'].numpy().astype('str'))
    pred_df['userId'] += list(example[0]['meta']['userId'].numpy().astype('str'))
#     pred_df['user_embed'] += list(example[0]['user_embed'].numpy().astype('str'))
#     pred_df['post_embed'] += list(example[0]['post_embed'].numpy().astype('str'))
    pred_df['isp_score'] += list(example[0]['meta']['isp_score'].numpy().astype('str'))
#     print("pred_df['isp_date'] ",pred_df['isp_date'])
#     pred_df['L1'] += list(example[0]['meta']['L1'].numpy().astype('str'))
#     pred_df['L2'] += list(example[0]['meta']['L2'].numpy().astype('str'))
    pred_df['production_model_prediction_combined'] += list(example[0]['meta']['combined_score'].numpy().astype('float32'))

#     pred_df['groundtruth'] += list(example[1].numpy())
    pred_df['groundtruth'] += list(example[0]['meta']['video_play'].numpy().astype('float32'))
    pred_df['prediction'] += list(pred.numpy())
    
    pred_df['production_model_prediction_unified'] += list(example[0]['meta']['unified_signal1'].numpy().astype('float32'))
    
    progress_bar.update(1)
    if ind > test_steps:
        break
    

progress_bar.close()

3it [01:17, 25.77s/it]                                                                                                                         


In [38]:
def DCG(labels, perm, top=10):
    result = 0.0
    for i in range(min(top, len(perm))):
        result += labels[perm[i]] / np.log(i+2)
    return result

def NDCG(labels, preds, top=10):
    args = np.argsort(-preds)
    iargs = np.argsort(-labels)
    iDCG = DCG(labels, iargs, top=top)
    if iDCG < 1e-3:
        return 0.0, 0
    return DCG(labels, args, top=top) / iDCG, 1

def calc_NDCG(df, preds_field, label_field="groundtruth", top=10):
    dfg = df.groupby("userId")
    result = 0
    count = 0
    for _, group in dfg:
        if len(group) <= 1:
            continue
        labels = group[label_field].values
        preds = group[preds_field].values
        result_a, count_a = NDCG(labels, preds, top=top)
        result += result_a
        count += count_a
    result /= count
    return result

def naive_roc_auc_score(y_true, y_pred):
    num_same_sign = 0
    num_pairs = 0
    
    for a in range(len(y_true)):
        for b in range(len(y_true)):
            if y_true[a] > y_true[b]:
                num_pairs += 1
            if y_pred[a] > y_pred[b]:
                num_same_sign += 1
            elif y_pred[a] == y_pred[b]:
                num_same_sign += .5
                
    return num_same_sign / num_pairs


In [39]:
user_g = pd.DataFrame.from_dict(pred_df).groupby(['userId'])

In [40]:
pd_data = pd.DataFrame.from_dict(pred_df)

In [41]:
pd_data = pd_data.sort_values('userId')

In [42]:
pd_data['production_model_prediction_combined'].max()

3.4740515

In [43]:
pd_data['prediction2'] = 1 / (1 + np.exp(-pd_data.prediction))
pd_data['production_model_prediction2'] = 1 / (1 + np.exp(-pd_data.production_model_prediction_combined))

In [44]:
pd_data2 = pd_data[(~pd_data['isp_score'].isnull()) & (pd_data['isp_score']!='')]

# naive_roc_auc_score(pd_data['production_model_prediction_combined'],pd_data['prediction'])

In [45]:
calc_NDCG(pd_data, "prediction2")

0.8242107243146556

In [46]:
calc_NDCG(pd_data, "production_model_prediction_unified")

0.9980759632547317

In [47]:
calc_NDCG(pd_data, "production_model_prediction_combined")

0.9993088766409981

In [48]:
pd_data.head()

Unnamed: 0,isp_date,postId,userId,model,isp_score,groundtruth,prediction,production_model_prediction_unified,production_model_prediction_combined,prediction2,production_model_prediction2
35820,,1283524835,1000005741,,,0.0,0.491311,0.81,0.0,0.620415,0.5
90367,2023-02-22 18:40:43 UTC,1746427635,1000060902,isp_ranker_sugg_video_realtime_ffm,6.591061,1.0,1.119097,12.85375,1.292685,0.753821,0.784601
6569,,3833811835,1000099413,,,0.0,0.489031,0.54,0.0,0.619878,0.5
6870,,5302742735,1000099413,,,0.0,0.730365,1.6255,0.0,0.674885,0.5
7936,,7477078735,1000115478,,,0.0,0.184068,0.405,0.0,0.545887,0.5


## Production AUC based on video_play as ground truth signal

In [49]:
fpr, tpr, thresholds = roc_curve(pd_data2['groundtruth'], pd_data2['prediction2'])
auc(fpr, tpr)



0.6572690694859444

## Production RMSE based on video_play as ground truth signal

In [50]:
from sklearn.metrics import mean_squared_error
import math
rmse_score = math.sqrt(mean_squared_error(pd_data2['groundtruth'], pd_data2['prediction2']))
print("production rmse_score ",rmse_score)


production rmse_score  0.5807993608370067


## Percentage of rows having ISP score

In [51]:
(len(pd_data2)/len(pd_data))*100

14.711333333333334

In [52]:
pd_data2.isp_score = pd_data2.isp_score.astype('float32')
pd_data2['isp_score2'] = 1 / (1 + np.exp(-pd_data2.isp_score))


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [56]:
X = pd_data2[['isp_score2','prediction2']]
y = pd_data2['groundtruth']

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X)
X_scaled = scaler.transform(X)


from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(solver='lbfgs')
lr.fit(X_scaled,y)

LogisticRegression()

In [57]:
lr.coef_

array([[0.05857776, 0.53509565]])

In [65]:
from sklearn.metrics import mean_squared_error
import math
tot_score = np.add([0.03*j for j in pd_data2['isp_score2'].tolist()],[0.97*_ for _ in pd_data2['prediction2'].tolist()])
rmse_score = math.sqrt(mean_squared_error(y, tot_score))#dfk['pred']))#df['pred']))#tot_score0.7257371784077328
print("rmse_score ",rmse_score)


rmse_score  0.5729524494677153


In [66]:
tot_score = np.add([0.03*j for j in pd_data2['isp_score2'].tolist()],[0.97*_ for _ in pd_data2['prediction2'].tolist()])
fpr, tpr, thresholds = roc_curve(pd_data2['groundtruth'], tot_score, pos_label=1)
print(" auc is ",auc(fpr, tpr))


 auc is  0.6585137197757172


In [55]:
tot_score = np.add([0.1458677*j for j in pd_data2['isp_score2'].tolist()],[5.88510977*_ for _ in pd_data2['prediction2'].tolist()])
fpr, tpr, thresholds = roc_curve(pd_data2['groundtruth'], tot_score, pos_label=1)
print(" auc is ",auc(fpr, tpr))


 auc is  0.6585437099161611


In [62]:
for i in np.arange(0,1,0.01):
    tot_score = np.add([i*j for j in pd_data2['isp_score2'].tolist()],[(1-i)*_ for _ in pd_data2['prediction2'].tolist()])
    fpr, tpr, thresholds = roc_curve(pd_data2['groundtruth'], tot_score, pos_label=1)
    print("i is ",i," auc ",auc(fpr, tpr))


i is  0.0  auc  0.6572690694859444
i is  0.01  auc  0.658055002522999
i is  0.02  auc  0.6584807133233826
i is  0.03  auc  0.6585137197757172
i is  0.04  auc  0.6581155341727272
i is  0.05  auc  0.6573284605589544
i is  0.06  auc  0.6561963381627695
i is  0.07  auc  0.6547035989228025
i is  0.08  auc  0.6529093906611049
i is  0.09  auc  0.6508459089934809
i is  0.1  auc  0.6485514740543229
i is  0.11  auc  0.6460877364477701
i is  0.12  auc  0.6434854160671432
i is  0.13  auc  0.6408042607535249
i is  0.14  auc  0.6380452110772897
i is  0.15  auc  0.6352877182071917
i is  0.16  auc  0.6325439771246366
i is  0.17  auc  0.6298511133315299
i is  0.18  auc  0.627243646841728
i is  0.19  auc  0.624721729011383
i is  0.2  auc  0.6222912302969696
i is  0.21  auc  0.620007871384807
i is  0.22  auc  0.6178927232135125
i is  0.23  auc  0.615934909762435
i is  0.24  auc  0.6141599021097599
i is  0.25  auc  0.6125602189371067
i is  0.26  auc  0.6111517202069956
i is  0.27  auc  0.6099039076547574
