### data precess

In [21]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import OrdinalEncoder, StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
import math
from catboost import CatBoostRegressor
from sklearn import ensemble
from sklearn.preprocessing import RobustScaler, StandardScaler
from xgboost.sklearn import XGBClassifier, XGBRegressor
from causalml.metrics.visualize import plot_lift, plot_qini, plot_gain, plot_tmlegain, auuc_score, qini_score, \
    get_tmlegain
from causalml.propensity import ElasticNetPropensityModel
import pickle
import argparse
import logging
import json

logging.basicConfig(filename='casual_inference_miaosuan.log.txt',
                    filemode='a',
                    format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s',
                    datefmt='%H:%M:%S',
                    level=logging.DEBUG)

logging.info("Running Urban Planning")
logger = logging.getLogger('urbanGUI')


# LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s"
# logging.basicConfig(level=logging.DEBUG, format=LOG_FORMAT)


def data_process(train_data, inference_data, feature_columns, treatment_columns_category, outcome_column):
    # 众数 补充缺失值
    train = pd.concat([train_data, inference_data])
    train = train[feature_columns]
    for col in feature_columns:
        d = dict(train[col].value_counts())
        train[col] = train[col].fillna(sorted(d.items(), key=lambda x: x[1], reverse=True)[0][0])

        train_data[col] = train_data[col].fillna(sorted(d.items(), key=lambda x: x[1], reverse=True)[0][0])
        inference_data[col] = inference_data[col].fillna(sorted(d.items(), key=lambda x: x[1], reverse=True)[0][0])

    treatment_columns_category_dict = dict()
    for col in treatment_columns_category:
        label_encoder = LabelEncoder().fit(train[col])
        temp_dict = dict(
            zip(range(0, len(label_encoder.classes_)), label_encoder.classes_))
        treatment_columns_category_dict[col] = temp_dict
        train[col] = label_encoder.transform(train[col])

        train_data[col] = label_encoder.transform(train_data[col])
        inference_data[col] = label_encoder.transform(inference_data[col])

    for col in feature_columns:
        if train[col].dtype == 'object':
            label_encoder = LabelEncoder().fit(train[col])
            train[col] = label_encoder.transform(train[col])

            train_data[col] = label_encoder.transform(train_data[col])
            inference_data[col] = label_encoder.transform(inference_data[col])

    xgb = XGBRegressor()
    xgb.fit(train_data[feature_columns], train_data[outcome_column])

    # def get_auuc(xgb, train_data, treatment_columns_category_dict, treatment_columns_category, treatment_column,
    #              outcome_column):
    #     for i in treatment_columns_category:
    #         _keys = list(treatment_columns_category_dict[i].keys())
    #         for i_value in _keys:
    #             train_temp = train_data.copy(deep=True)
    #             train_temp[i+str(i_value)] =i_value

    inference_data["outcome1"] = xgb.predict(inference_data[feature_columns])

    return treatment_columns_category_dict, inference_data, xgb,train_data

### 推理

In [None]:
def inference(train, treatment_columns_category_dict, xgb, feature_columns,
              treatment_columns_category, treatment_columns_common, treatment_change_value,
              treatment_columns_continuous,
              userid_column, target_data_path):
    outcome_pd = pd.DataFrame()
    outcome_pd['outcome'] = xgb.predict(train[feature_columns])
    result = dict()

    # 基于连续型变量的处理
    treatment_cont_common = []
    for col, limit_value in treatment_columns_continuous.items():
        if col in treatment_columns_common:
            treatment_cont_common.append(col)
        else:
            train_temp = train.copy(deep=True)
            if limit_value[2] == 0:
                if limit_value[0] is not None:
                    train_temp[col] = train_temp[col].apply(
                        lambda x: x * (1 - treatment_change_value) if x * (1 - treatment_change_value) > limit_value[
                            0] else x)
                if limit_value[1] is not None:
                    train_temp[col] = train_temp[col].apply(
                        lambda x: x * (1 - treatment_change_value) if x * (1 - treatment_change_value) < limit_value[
                            1] else x)
                if limit_value[0] is None and limit_value[1] is None:
                    train_temp[col] = train_temp[col].apply(lambda x: x * (1 - treatment_change_value))

            if limit_value[2] == 1:
                if limit_value[0] is not None:
                    train_temp[col] = train_temp[col].apply(
                        lambda x: x * (1 + treatment_change_value) if x * (1 + treatment_change_value) > limit_value[
                            0] else x)
                if limit_value[1] is not None:
                    train_temp[col] = train_temp[col].apply(
                        lambda x: x * (1 + treatment_change_value) if x * (1 + treatment_change_value) < limit_value[
                            1] else x)
                if limit_value[0] is None and limit_value[1] is None:
                    train_temp[col] = train_temp[col].apply(lambda x: x * (1 + treatment_change_value))
            outcome_pd[col] = xgb.predict(train_temp[feature_columns])
    result["treatment_cont_common"] = treatment_cont_common


    treatment_cate_common = []
    for col in treatment_columns_category:
        train_temp = train.copy(deep=True)
        if col in treatment_columns_common:
            pd_category = pd.DataFrame()
            logging.info("{} is category 变量，在common中 ".format(col))
            treat_enum = treatment_columns_category_dict[col]
            logging.info(treat_enum)
            for enum_value in treat_enum.keys():
                logging.info(enum_value)
                train_temp[col] = enum_value
                pd_category[enum_value] = xgb.predict(train_temp[feature_columns])
            d = dict(pd_category.mean())
            d = sorted(d.items(), key=lambda x: x[1], reverse=True)[0][0]
            _temp_dict = dict()
            _temp_dict["treatment"] = col
            _temp_dict["value"] = treat_enum[d]
            treatment_cate_common.append(_temp_dict)
            logging.info("\n\n")

        else:
            pd_category = pd.DataFrame()
            logging.info("{} is category 变量，不在common中 ".format(col))
            treat_enum = treatment_columns_category_dict[col]
            logging.info(treat_enum)
            for enum_value in treat_enum.keys():
                logging.info(enum_value)
                train_temp[col] = enum_value
                pd_category[enum_value] = xgb.predict(train_temp[feature_columns])
            logging.info("\n\n")
            cate_max = pd_category.max(axis=1)
            cate_idxmax = pd_category.idxmax(axis=1)
            pd_category[col] = cate_max
            pd_category[col + "_idx"] = cate_idxmax

            outcome_pd[col] = cate_max
            outcome_pd[col + "_idx"] = cate_idxmax

    result["treatment_cate_common"] = treatment_cate_common

    temp_pd = outcome_pd[[i for i in treatment_columns_category + list(
        treatment_columns_continuous.keys()) if i not in treatment_columns_common]]
    outcome_pd['max_idx'] = temp_pd.idxmax(axis=1)
    outcome_pd["user_id"] = train[userid_column].values

    # user_res = dict()
    # outcome_pd_cols = outcome_pd.columns.to_list()
    # for key, value in outcome_pd.iterrows():
    #     max_idx = value["max_idx"]
    #     if max_idx + "_idx" in outcome_pd_cols:
    #         temp = dict()
    #         temp[max_idx] = value[max_idx + "_idx"]
    #         user_res[value["user_id"]] = temp
    #     else:
    #         user_res[value["user_id"]] = max_idx
    # #     logging.info(max_idx)
    # result["user_treatment"] = user_res

    user_res = []
    outcome_pd_cols = outcome_pd.columns.to_list()
    for key, value in outcome_pd.iterrows():
        _temp_dict = dict()
        max_idx = value["max_idx"]
        if max_idx + "_idx" in outcome_pd_cols:
            _temp_dict["user_id"] = value["user_id"]
            _temp_dict["treatment"] = max_idx
            _temp_dict["value"] = value[max_idx + "_idx"]

        else:
            _temp_dict["user_id"] = value["user_id"]
            _temp_dict["treatment"] = max_idx
        user_res.append(_temp_dict)
    result["user_treatment"] = user_res

    with open(target_data_path, "w", encoding="utf8") as tf:
        json.dump(result, tf, ensure_ascii=False, indent=2, cls=NpEncoder)

    return result, outcome_pd



###  auuc

In [None]:
def get_auuc(pred_df, y_col, treatment_col):
    auuc_df = pd.DataFrame(auuc_score(pred_df, outcome_col=y_col,
                                      treatment_col=treatment_col, normalize=True)).reset_index()
    auuc_df.columns = ['Learner', 'auuc']
    auuc_df['Lift'] = (auuc_df['auuc'] /
                       auuc_df[auuc_df.Learner == 'Random'].auuc.values) - 1
    auuc_df = auuc_df.sort_values('auuc', ascending=False)
    logging.info(auuc_df)
    return auuc_df


class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return super(NpEncoder, self).default(obj)

In [22]:
train = pd.read_csv('train.csv')
train = train[['V_0', 'V_1', 'V_2', 'V_3', 'V_4', 'V_5', 'V_6', 'V_7', 'V_8', 'V_9',
               'V_10', 'treatment', 'outcome']]
train['user_id'] = train.index

feature_columns = ['V_0', 'V_1', 'V_2', 'V_3', 'V_4', 'V_5', 'V_6', 'V_7', 'V_8', 'V_9',
                   'V_10', 'treatment']
# treatment_columns_continuous=['V_6', 'V_7']
treatment_columns_category = ['V_10', 'treatment']
treatment_columns_common = ['V_6', 'treatment']
treatment_change_value = 0.2
outcome_column = ["outcome"]

treatment_columns_continuous = {
    "V_6": [None, 10, 1],
    "V_7": [0.3, None, 0]
}
train_data = train[0:30000]
inference_data = train[30000:]


userid_column = ["user_id"]

treatment_change_value = 0.2

target_data_path="test.json"

treatment_columns_category_dict, inference_data, xgb, train_data = data_process(train_data, inference_data, feature_columns,
                                                                    treatment_columns_category,
                                                                    outcome_column)
result, outcome_dp = inference(inference_data, treatment_columns_category_dict, xgb, feature_columns,
                               treatment_columns_category, treatment_columns_common, treatment_change_value,
                               treatment_columns_continuous,
                               userid_column, target_data_path)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/

In [34]:
train_data.head(2)

Unnamed: 0,V_0,V_1,V_2,V_3,V_4,V_5,V_6,V_7,V_8,V_9,V_10,treatment,outcome,user_id
0,1.778505,2.376919,0.0,-1.680176,-1.581142,0.0,-31.4,0.838,0,3,0,2,4.745129,0
1,-0.631728,5.337631,0.0,-0.869348,-2.767838,0.0,-42.0,4.12,0,999,0,0,7.310963,1


In [33]:
xgb, feature_columns

(XGBRegressor(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
              early_stopping_rounds=None, enable_categorical=False,
              eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
              importance_type=None, interaction_constraints='',
              learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,
              max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
              missing=nan, monotone_constraints='()', n_estimators=100, n_jobs=0,
              num_parallel_tree=1, predictor='auto', random_state=0, reg_alpha=0,
              reg_lambda=1, ...),
 ['V_0',
  'V_1',
  'V_2',
  'V_3',
  'V_4',
  'V_5',
  'V_6',
  'V_7',
  'V_8',
  'V_9',
  'V_10',
  'treatment'])

In [87]:
treatment_columns_category_dict

{'V_10': {0: 'no', 1: 'unknown', 2: 'yes'}, 'treatment': {0: 0, 1: 1, 2: 2}}

In [57]:
outcome_column

['outcome']

In [90]:
auuc_pd

Unnamed: 0,outcome,V_10,V_10_0,V_10_1,unknown--no,V_10_2,yes--no,treatment,treatment_0,treatment_1,1--0,treatment_2,2--0
0,4.745129,0,3.988036,8.055796,4.067760,1.094690,-2.893346,2,1.969935,3.119568,1.149633,3.988036,2.018101
1,7.310963,0,-10.826632,7.542346,18.368979,-17.824121,-6.997489,0,-10.826632,-7.886559,2.940073,1.041240,11.867872
2,-52.156193,0,-33.853062,-14.249991,19.603069,-41.699028,-7.845966,2,-48.689671,-47.755993,0.933678,-33.853062,14.836609
3,-17.011566,2,-25.706264,0.482524,26.188789,-30.317589,-4.611324,0,-30.317589,-28.352587,1.965002,-17.887976,12.429613
4,-15.229788,2,-15.105337,10.054493,25.159830,-19.134491,-4.029154,2,-31.926182,-28.537054,3.389128,-19.134491,12.791691
...,...,...,...,...,...,...,...,...,...,...,...,...,...
29995,-2.498687,0,4.125650,19.421726,15.296076,-0.668706,-4.794356,2,-5.869147,-4.644073,1.225075,4.125650,9.994797
29996,-37.206500,2,-24.533562,-1.416815,23.116747,-31.598116,-7.064554,2,-45.623962,-44.344898,1.279064,-31.598116,14.025846
29997,-20.834510,0,-22.400124,-6.857708,15.542416,-30.257338,-7.857214,0,-22.400124,-26.100471,-3.700348,-14.481441,7.918682
29998,-22.517593,0,-33.752384,-8.604316,25.148067,-38.397629,-4.645245,0,-33.752384,-34.352486,-0.600101,-22.293121,11.459263


In [101]:
treatment_columns_category_dict

{'V_10': {0: 'no', 1: 'unknown', 2: 'yes'}, 'treatment': {0: 0, 1: 1, 2: 2}}

In [97]:
outcome_column=['outcome']

In [103]:
i_values

[0, 1, 2]

In [105]:
train["V_0"]

Unnamed: 0,V_0,V_1,V_2,V_3,V_4,V_5,V_6,V_7,V_8,V_9,V_10,treatment,outcome,user_id
0,1.778505,2.376919,0.0,-1.680176,-1.581142,0.0,-31.4,0.838,no,3,no,2,4.745129,0
1,-0.631728,5.337631,0.0,-0.869348,-2.767838,0.0,-42.0,4.120,no,999,no,0,7.310963,1
2,-0.862595,3.215953,1.0,-1.614799,-2.614270,1.0,-41.8,4.866,no,999,no,2,-52.156193,2
3,0.230854,2.250193,0.0,-1.631994,-5.705525,1.0,-36.1,4.966,no,999,yes,0,-17.011566,3
4,0.194774,4.896442,0.0,-0.143322,2.568728,1.0,-42.7,4.968,no,999,yes,2,-15.229788,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36183,0.826893,4.331358,0.0,-1.551377,-2.904315,0.0,-46.2,1.250,no,999,unknown,2,20.512318,36183
36184,0.631707,1.383925,0.0,-0.411007,4.483282,1.0,-42.7,4.962,no,999,no,0,-3.178066,36184
36185,-0.072426,4.918745,0.0,-0.351575,-4.926085,1.0,-36.1,4.963,no,999,no,0,-43.733190,36185
36186,-1.029582,4.633457,0.0,-0.496135,3.944163,1.0,-46.2,1.334,no,999,no,2,12.905083,36186


In [118]:
auuc_pd["V_10"].value_counts()

0    24787
2     4511
1      702
Name: V_10, dtype: int64

Unnamed: 0,outcome,V_10,V_10_0,V_10_1,unknown--no,V_10_2,yes--no,treatment,treatment_0,treatment_1,1--0,treatment_2,2--0
0,4.745129,0,3.988036,8.055796,4.067760,1.094690,-2.893346,2,1.969935,3.119568,1.149633,3.988036,2.018101
1,7.310963,0,-10.826632,7.542346,18.368979,-17.824121,-6.997489,0,-10.826632,-7.886559,2.940073,1.041240,11.867872
2,-52.156193,0,-33.853062,-14.249991,19.603069,-41.699028,-7.845966,2,-48.689671,-47.755993,0.933678,-33.853062,14.836609
5,1.494013,0,-0.676663,19.728470,20.405132,-8.822582,-8.145919,2,-17.009354,-15.632926,1.376428,-0.676663,16.332691
6,11.463869,0,-1.136559,24.965666,26.102224,-9.011839,-7.875280,2,-15.572253,-14.268182,1.304071,-1.136559,14.435695
...,...,...,...,...,...,...,...,...,...,...,...,...,...
29993,-30.825209,0,-24.816895,-6.927785,17.889111,-27.992214,-3.175320,1,-27.583122,-24.816895,2.766228,-13.702352,13.880771
29995,-2.498687,0,4.125650,19.421726,15.296076,-0.668706,-4.794356,2,-5.869147,-4.644073,1.225075,4.125650,9.994797
29997,-20.834510,0,-22.400124,-6.857708,15.542416,-30.257338,-7.857214,0,-22.400124,-26.100471,-3.700348,-14.481441,7.918682
29998,-22.517593,0,-33.752384,-8.604316,25.148067,-38.397629,-4.645245,0,-33.752384,-34.352486,-0.600101,-22.293121,11.459263


In [106]:
outcome_column

['outcome']

In [131]:
def auuc_output(train_data, feature_columns,  outcome_column, treatment_columns_category_dict):
    try:
        auuc_pd = train_data[outcome_column]
        for i in treatment_columns_category_dict.keys():
            auuc_pd[i] = train_data[i]
            i_keys = list(treatment_columns_category_dict[i].keys())
            i_values = list(treatment_columns_category_dict[i].values())
            print(i_values, i_keys)
            for j in i_keys:
                temp_data = train_data.copy(deep=True)
                temp_data[i] = j
                auuc_pd[str(i)+"_"+str(j)
                        ] = xgb.predict(temp_data[feature_columns])
                if j != 0:
                    auuc_pd[str(i_values[j])+"--"+str(i_values[0])
                            ] = auuc_pd[str(i)+"_"+str(j)]-auuc_pd[str(i)+"_"+str(0)]
                    lalla = auuc_pd[outcome_column+[i] +
                                    [str(i_values[j])+"--"+str(i_values[0])]]

                    print(i, j, i_values[j])
                    lalla = lalla[lalla[i].isin([0, j])]
                    lalla[i].replace(j, 1, inplace=True)
                    print(get_auuc(lalla, outcome_column[0], i))
                    print("\n\n")
    except:
        logging.info("\n\n****something wrong about auuc_output****")

In [132]:
auuc_output(train_data, feature_columns,  outcome_column, treatment_columns_category_dict)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/

['no', 'unknown', 'yes'] [0, 1, 2]
V_10 1 unknown
       Learner      auuc      Lift
0  unknown--no  0.606296  0.204453
1       Random  0.503379  0.000000



V_10 2 yes



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


   Learner      auuc      Lift
0  yes--no -0.299113 -0.402522
1   Random -0.500625  0.000000



[0, 1, 2] [0, 1, 2]



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/

treatment 1 1
  Learner      auuc      Lift
0    1--0 -0.058511 -0.882667
1  Random -0.498675  0.000000






A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


treatment 2 2
  Learner      auuc      Lift
0    2--0  0.665040  0.332924
1  Random  0.498933  0.000000





In [126]:
auuc_pd=train_data[outcome_column]
for i in  treatment_columns_category_dict.keys():
    auuc_pd[i]=train_data[i]
    i_keys=list(treatment_columns_category_dict[i].keys())
    i_values=list(treatment_columns_category_dict[i].values()) 
    print(i_values,i_keys)
    for j in i_keys:
        temp_data = train_data.copy(deep=True)
        temp_data[i]=j
        auuc_pd[str(i)+"_"+str(j)]=xgb.predict(temp_data[feature_columns])
        if j!=0:
            auuc_pd[str(i_values[j])+"--"+str(i_values[0])]=auuc_pd[str(i)+"_"+str(j)]-auuc_pd[str(i)+"_"+str(0)]
            lalla=auuc_pd[outcome_column+[i]+[str(i_values[j])+"--"+str(i_values[0])]]
            
            print(i,j,i_values[j])
            lalla = lalla[lalla[i].isin([0,j])]
            lalla[i].replace(j,1,inplace=True)
            print(get_auuc(lalla,outcome_column[0],i))
            print("\n\n")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/

['no', 'unknown', 'yes'] [0, 1, 2]
V_10 1 unknown
       Learner      auuc      Lift
0  unknown--no  0.606296  0.204453
1       Random  0.503379  0.000000



V_10 2 yes



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


   Learner      auuc      Lift
0  yes--no -0.299113 -0.402522
1   Random -0.500625  0.000000



[0, 1, 2] [0, 1, 2]
treatment 1 1



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/

  Learner      auuc      Lift
0    1--0 -0.058511 -0.882667
1  Random -0.498675  0.000000



treatment 2 2



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


  Learner      auuc      Lift
0    2--0  0.665040  0.332924
1  Random  0.498933  0.000000





In [None]:
train_data