In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix
from sklearn.metrics import roc_curve, auc, roc_auc_score
from sklearn.model_selection import StratifiedKFold

In [2]:
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [3]:
#Merged User info, activity log and training and testing data on user-merchant pairs
df = pd.read_csv("/Users/Vishal/Project_1/final.csv")

#Replace null values in columns with corresponding values

df['age_range'] = df['age_range'].fillna(0)
df['label'] = df['label'].fillna(2)
df['gender'] = df['gender'].fillna(2)
df['brand_id'] = df['brand_id'].fillna(0)

# Convert time stamp to Year-Month-Date format
df['time_stamp'] = pd.to_datetime(df['time_stamp'], format='%m%d', errors='coerce')

In [4]:
#Training data from format 1
df_train = pd.read_csv("/Users/Vishal/Project_1/data_format1/train_format1.csv")

In [5]:
# One hot encoding of Action Type

one_hot = pd.get_dummies(df['action_type'])

df = pd.concat([df, one_hot], axis=1)

In [6]:
df_train = pd.merge(df_train, df[['user_id','age_range','gender']].drop_duplicates(), on = 'user_id', how = 'inner')


# User Level Features

# Activity of Users

In [7]:
def useractivity (x, y):
    
    feat = x.groupby(['user_id'])[0,1,2,3].sum().reset_index()
    
    y =  pd.merge(y, feat, on = 'user_id', how = 'left') 
    
    del feat
    return y


In [8]:
# Count of each action type by User before 1111
df_combined = useractivity(df[df.time_stamp != '1900-11-11'], df_train)

In [9]:
df_combined = df_combined.rename(columns={0: "userbeforesale0",1: "userbeforesale1",
                                          2: "userbeforesale2",3: "userbeforesale3"})

In [10]:
# Count of each action type by User on 1111
df_combined = useractivity(df[df.time_stamp == '1900-11-11'], df_combined)

In [11]:
df_combined = df_combined.rename(columns={0: "useronsale0",1: "useronsale1",
                                          2: "useronsale2",3: "useronsale3"})

In [12]:
# ratio of clicks to purchase, and fav to purchase

df_combined['user_ratio_0_to_2_before_sale'] = df_combined['userbeforesale0']/df_combined['userbeforesale2']
df_combined['user_ratio_3_to_2_before_sale'] = df_combined['userbeforesale3']/df_combined['userbeforesale2']

df_combined['user_ratio_0_to_2_on_sale'] = df_combined['useronsale0']/df_combined['useronsale2']
df_combined['user_ratio_3_to_2_on_sale'] = df_combined['useronsale3']/df_combined['useronsale2']


In [13]:
df_combined = df_combined.replace(np.inf, np.nan)
df_combined = df_combined.fillna(0)

# Active and Purchase Days

In [14]:
# Active days

temp = df[['user_id','time_stamp']].drop_duplicates().groupby(['user_id'])['time_stamp'].count()\
        .reset_index().rename(columns = {'time_stamp':'active_days'})

temp['active_days'] -= 1

df_combined = pd.merge(df_combined, temp, on = 'user_id', how = 'left')

In [15]:
# Purchase Days

temp = df[df.action_type == 2][['user_id','time_stamp']].drop_duplicates().groupby(['user_id'])['time_stamp']\
        .count().reset_index().rename(columns = {'time_stamp':'purchase_days'})

temp['purchase_days'] -= 1

df_combined = pd.merge(df_combined, temp, on = 'user_id', how = 'left')

# Monthwise Count

In [16]:
def usermonthwisecount_norm (x , y):
    
    #Monthwise activity count
    temp = x[['user_id','time_stamp']].groupby(['user_id',x['time_stamp'].dt.month]).count()\
           .rename(columns = {'time_stamp':'count'}) 
    
    temp = pd.pivot_table(temp, index = 'user_id', columns = 'time_stamp', values = 'count').reset_index().fillna(0)
    
    temp['total'] = temp[5]+temp[6]+temp[7]+temp[8]+temp[9]+temp[10]+temp[11]
    
    #Normalised activity count
    for i in range(5,12):
        temp[i] = temp[i]/temp['total']
    
    temp = temp.drop('total', 1)
    
    y = pd.merge(y, temp, on = 'user_id', how = 'left' )
    
    del temp
    return y

In [17]:
#Normalised count of monthwise activity
df_combined = usermonthwisecount_norm (df, df_combined)

In [18]:
df_combined = df_combined.rename(columns = {5:'useractivity5',6:'useractivity6',7:'useractivity7'
                                           ,8:'useractivity8',9:'useractivity9',10:'useractivity10'
                                           ,11:'useractivity11'})


In [19]:
#Normalised count of monthwise purchase
df_combined = usermonthwisecount_norm (df[df.action_type == 2], df_combined)

In [20]:
df_combined = df_combined.rename(columns = {5:'userpurchase5',6:'userpurchase6',7:'userpurchase7'
                                           ,8:'userpurchase8',9:'userpurchase9',10:'userpurchase10'
                                           ,11:'userpurchase11'})

In [21]:
def usermonthwisecount_avg (x , y):
    
    # Calculate activity monthwise
    temp = x[['user_id','time_stamp']].groupby(['user_id',x['time_stamp'].dt.month]).count()\
           .rename(columns = {'time_stamp':'count'}) 
    
    temp = pd.pivot_table(temp, index = 'user_id', columns = 'time_stamp', values = 'count').reset_index().fillna(0)
    
    
    # Calculate active days monthwise
    temp1 = x[['user_id','time_stamp']].drop_duplicates().groupby(['user_id',x['time_stamp'].dt.month]).count()\
             .rename(columns = {'time_stamp':'active_days_count'})

    temp1 = pd.pivot_table(temp1, index = 'user_id', columns = 'time_stamp', values = 'active_days_count' ).reset_index().fillna(0)

    
    # For average activity monthwise
    for i in range(5,12):
        temp[i] = temp[i]/temp1[i]
        
    temp = temp.fillna(0)
    
    y = pd.merge(y, temp, on = 'user_id', how = 'left' )
    
    del temp
    del temp1
    return y


In [22]:
#Average monthwise activity
df_combined = usermonthwisecount_avg ( df, df_combined )

In [23]:
df_combined = df_combined.rename(columns = {5:'useravgactivity5',6:'useravgactivity6',7:'useravgactivity7'
                                           ,8:'useravgactivity8',9:'useravgactivity9',10:'useravgactivity10'
                                           ,11:'useravgactivity11'})

In [24]:
#Average monthwise purchase
df_combined = usermonthwisecount_avg ( df[df.action_type == 2] , df_combined )

In [25]:
df_combined = df_combined.rename(columns = {5:'useravgpurchase5',6:'useravgpurchase6',7:'useravgpurchase7'
                                           ,8:'useravgpurchase8',9:'useravgpurchase9',10:'useravgpurchase10'
                                           ,11:'useravgpurchase11'})


# Number of Unique Brands/ Categories/ Items/ Merchant clicked and purchased

In [26]:
#Number of unique brands/categories/items that a user has an activity on before 1111

temp = df[df.time_stamp != '1900-11-11'][['user_id','brand_id','cat_id','item_id','seller_id']]\
        .groupby('user_id').agg({'brand_id':'nunique','cat_id':'nunique','item_id':'nunique','seller_id':'nunique'})\
        .reset_index().rename(columns = {'brand_id':'user_unique_brands_before_sale',
                                         'cat_id':'user_unique_categories_before_sale',
                                         'item_id':'user_unique_items_before_sale',
                                         'seller_id':'user_unique_sellers_before_sale'})

df_combined = pd.merge(df_combined, temp, on = 'user_id', how = 'left')


#Number of unique brands/categories/items that a user has purchased before 1111

temp = df[(df.time_stamp != '1900-11-11')&(df.action_type == 2)][['user_id','brand_id','cat_id','item_id','seller_id']]\
        .groupby('user_id').agg({'brand_id':'nunique','cat_id':'nunique','item_id':'nunique','seller_id':'nunique'})\
        .reset_index().rename(columns = {'brand_id':'user_unique_brands_purchased_before_sale',
                                         'cat_id':'user_unique_categories_purchased_before_sale',
                                         'item_id':'user_unique_items_purchased_before_sale',
                                         'seller_id':'user_unique_sellers_purchased_before_sale'})

df_combined = pd.merge(df_combined, temp, on = 'user_id', how = 'left')


#Number of unique brands/categories/items that a user has an activity on  1111

temp = df[df.time_stamp == '1900-11-11'][['user_id','brand_id','cat_id','item_id','seller_id']]\
        .groupby('user_id').agg({'brand_id':'nunique','cat_id':'nunique','item_id':'nunique','seller_id':'nunique'})\
        .reset_index().rename(columns = {'brand_id':'user_unique_brands_on_sale',
                                         'cat_id':'user_unique_categories_on_sale',
                                         'item_id':'user_unique_items_on_sale',
                                         'seller_id':'user_unique_sellers_on_sale'})
 
df_combined = pd.merge(df_combined, temp, on = 'user_id', how = 'left')


#Number of unique brands/categories/items that a user has purchased on 1111

temp = df[(df.time_stamp == '1900-11-11')&(df.action_type == 2)][['user_id','brand_id','cat_id','item_id','seller_id']]\
        .groupby('user_id').agg({'brand_id':'nunique','cat_id':'nunique','item_id':'nunique','seller_id':'nunique'})\
        .reset_index().rename(columns = {'brand_id':'user_unique_brands_purchased_on_sale',
                                         'cat_id':'user_unique_categories_purchased_on_sale',
                                         'item_id':'user_unique_items_purchased_on_sale',
                                         'seller_id':'user_unique_sellers_purchased_on_sale'})

df_combined = pd.merge(df_combined, temp, on = 'user_id', how = 'left')



# User all month purchase, only in 11

In [27]:
temp = df[(df.action_type == 2)][['user_id','time_stamp']].groupby(['user_id',df['time_stamp'].dt.month]).count()\
        .rename(columns = {'time_stamp':'count'})
    
temp = pd.pivot_table(temp, index = 'user_id', columns = 'time_stamp', values = 'count').reset_index().fillna(0)
    

temp1 = temp[(temp[5]>0)&(temp[6]>0)&(temp[7]>0)&(temp[8]>0)&(temp[9]>0)&(temp[10]>0)&(temp[11]>0)]

temp1['user_all_month_purchase'] = 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [28]:
df_combined = pd.merge(df_combined, temp1[['user_id','user_all_month_purchase']], on = 'user_id', how = 'left')

In [29]:
temp2 = temp[(temp[5]==0)&(temp[6]==0)&(temp[7]==0)&(temp[8]==0)&(temp[9]==0)&(temp[10]==0)&(temp[11]>0)]

temp2 = temp2.drop([ 5,6,7,8,9,10], 1)

temp2[11] = temp2[11]/temp2[11].max()

temp2 = temp2.rename(columns = {11: 'user_purchase_only_11'})

In [30]:
df_combined = pd.merge(df_combined, temp2[['user_id','user_purchase_only_11']], on = 'user_id', how = 'left')

In [31]:
df_combined = df_combined.fillna(0)

# Count of each activity on Merchant

In [32]:
def selleractivity (x, y):
    
    feat = x.groupby(['seller_id'])[0,1,2,3].sum().reset_index()
    
    y =  pd.merge(y, feat, on = 'seller_id', how = 'left') 
    
    del feat
    return y

In [33]:
# Count of each action type on seller before 1111
df_combined = selleractivity(df[df.time_stamp != '1900-11-11'], df_combined)

In [34]:
df_combined = df_combined.rename(columns={0: "sellerbeforesale0",1: "sellerbeforesale1",
                                          2: "sellerbeforesale2",3: "sellerbeforesale3"})

In [35]:
# Count of each action type on seller on 1111
df_combined = selleractivity(df[df.time_stamp == '1900-11-11'], df_combined)

In [36]:
df_combined = df_combined.rename(columns={0: "selleronsale0",1: "selleronsale1",
                                          2: "selleronsale2",3: "selleronsale3"})

# Count of each action type by user on each merchant

In [37]:
# Count of each action type by user on each merchant

def userselleractivity (y):
    feat = df.groupby(['user_id','seller_id'])[0,1,2,3].sum().reset_index()
    feat = feat.rename(columns={0: "userseller0",1: "userseller1",2: "userseller2",3: "userseller3"})
    
    x =  pd.merge(y, feat, on = ['user_id','seller_id'])
    del feat
    return x


In [38]:
df_combined = userselleractivity(df_combined)

In [39]:
df_combined = df_combined.fillna(0)

# Age,gender related features

# Age

In [40]:
age = df.groupby(['age_range'])['action_type'].count().reset_index()\
        .rename(columns = {'action_type':'total_activity'})

temp = df[df.action_type == 2].groupby(['age_range'])['action_type'].count().reset_index()\
                              .rename(columns = {'action_type':'total_purchase'})

age = pd.merge(age, temp, on = 'age_range', how = 'left')


In [41]:
temp = df.groupby(['age_range'])['user_id'].nunique().reset_index()\
         .rename(columns = {'user_id':'unique_users_age'})

age = pd.merge(age, temp, on = 'age_range', how = 'left')

In [42]:
age['age_avg_activity'] = age['total_activity'] / age['unique_users_age']
age['age_avg_purchase'] = age['total_purchase'] / age['unique_users_age']

age['unique_users_age_norm'] = age['unique_users_age']/age['unique_users_age'].max()

In [43]:
age = age.drop(['unique_users_age','total_activity','total_purchase'] ,1)

In [44]:
age

Unnamed: 0,age_range,age_avg_activity,age_avg_purchase,unique_users_age_norm
0,0.0,105.74245,6.229536,0.852016
1,1.0,71.708333,4.0,0.000215
2,2.0,101.852055,5.584574,0.473525
3,3.0,132.987954,7.75305,1.0
4,4.0,147.542249,9.620345,0.716419
5,5.0,152.046497,9.728107,0.365209
6,6.0,152.653846,8.705109,0.317624
7,7.0,150.495566,7.713387,0.062622
8,8.0,128.383886,7.607425,0.011339


In [45]:
df_combined = pd.merge(df_combined, age, on = 'age_range', how = 'left')


# Gender

In [46]:
gender = df.groupby(['gender'])['action_type'].count().reset_index()\
        .rename(columns = {'action_type':'total_activity'})

temp = df[df.action_type == 2].groupby(['gender'])['action_type'].count().reset_index()\
                              .rename(columns = {'action_type':'total_purchase'})

gender = pd.merge(gender, temp, on = 'gender', how = 'left')

In [47]:
temp = df.groupby(['gender'])['user_id'].nunique().reset_index()\
         .rename(columns = {'user_id':'unique_users_gender'})

gender = pd.merge(gender, temp, on = 'gender', how = 'left')

In [48]:
gender['gender_avg_activity'] = gender['total_activity'] / gender['unique_users_gender']
gender['gender_avg_purchase'] = gender['total_purchase'] / gender['unique_users_gender']

gender['unique_users_gender_norm'] = gender['unique_users_gender']/gender['unique_users_gender'].max()

In [49]:
gender = gender.drop(['unique_users_gender','total_activity','total_purchase'] ,1)

In [50]:
df_combined = pd.merge(df_combined, gender, on = 'gender', how = 'left')


# Age-Gender Combined

In [51]:
#Make a new column with user-merchant pairs together as a key
df['age_gender'] = df.age_range.astype(str).str.cat(df.gender.astype(str), sep='_')


#Make a new column with user-merchant pairs together as a key
df_combined['age_gender'] = df_combined.age_range.astype(str).str.cat(df_combined.gender.astype(str), sep='_')


In [52]:
age_gender = df.groupby(['age_gender'])['action_type'].count().reset_index()\
               .rename(columns = {'action_type':'total_activity'})

temp = df[df.action_type == 2].groupby(['age_gender'])['action_type'].count().reset_index()\
                              .rename(columns = {'action_type':'total_purchase'})

age_gender = pd.merge(age_gender, temp, on = 'age_gender', how = 'left')

In [53]:
temp = df.groupby(['age_gender'])['user_id'].nunique().reset_index()\
         .rename(columns = {'user_id':'unique_users_age_gender'})

age_gender = pd.merge(age_gender, temp, on = 'age_gender', how = 'left')

In [54]:
age_gender['age_gender_avg_activity'] = age_gender['total_activity'] / age_gender['unique_users_age_gender']
age_gender['age_gender_avg_purchase'] = age_gender['total_purchase'] / age_gender['unique_users_age_gender']

age_gender['unique_users_age_gender_norm'] = age_gender['unique_users_age_gender']/age_gender['unique_users_age_gender'].max()

In [55]:
age_gender = age_gender.drop(['unique_users_age_gender','total_activity','total_purchase'] ,1)

In [56]:
df_combined = pd.merge(df_combined, age_gender, on = 'age_gender', how = 'left')


In [57]:
# Total active days for all users in each group

temp = df[['age_gender','time_stamp']].groupby(['age_gender'])['time_stamp'].count()\
        .reset_index().rename(columns = {'time_stamp':'age_gender_active_days_norm'})

temp['age_gender_active_days_norm'] = temp['age_gender_active_days_norm']/temp['age_gender_active_days_norm'].max()

df_combined = pd.merge(df_combined, temp, on = 'age_gender', how = 'left')

In [58]:
# Total purchase days for all users in each group

temp = df[df.action_type == 2][['age_gender','time_stamp']].groupby(['age_gender'])['time_stamp'].count()\
        .reset_index().rename(columns = {'time_stamp':'age_gender_purchase_days_norm'})

temp['age_gender_purchase_days_norm'] = temp['age_gender_purchase_days_norm']/temp['age_gender_purchase_days_norm'].max()

df_combined = pd.merge(df_combined, temp, on = 'age_gender', how = 'left')


# Merchant Level Features

In [59]:
# ratio of clicks to purchase, and fav to purchase

df_combined['seller_ratio_0_to_2_before_sale'] = (df_combined['sellerbeforesale0']/df_combined['sellerbeforesale2'])
df_combined['seller_ratio_3_to_2_before_sale'] = (df_combined['sellerbeforesale3']/df_combined['sellerbeforesale2'])

df_combined['seller_ratio_0_to_2_on_sale'] = (df_combined['selleronsale0']/df_combined['selleronsale2'])
df_combined['seller_ratio_3_to_2_on_sale'] = (df_combined['selleronsale3']/df_combined['selleronsale2'])


# Monthwise Count

In [60]:
def sellermonthwisecount_norm (x , y):
    
    #Monthwise activity count
    temp = x[['seller_id','time_stamp']].groupby(['seller_id',x['time_stamp'].dt.month]).count()\
           .rename(columns = {'time_stamp':'count'})
    
    temp = pd.pivot_table(temp, index = 'seller_id', columns = 'time_stamp', values = 'count').reset_index().fillna(0)
    
    temp['total'] = temp[5]+temp[6]+temp[7]+temp[8]+temp[9]+temp[10]+temp[11]
    
    #Normalised activity count
    for i in range(5,12):
        temp[i] = temp[i]/temp['total']
    
    temp = temp.drop('total', 1)
    
    y = pd.merge(y, temp, on = 'seller_id', how = 'left' )
    
    del temp
    return y

In [61]:
#Normalised count of monthwise activity
df_combined = sellermonthwisecount_norm (df, df_combined)

In [62]:
df_combined = df_combined.rename(columns = {5:'selleractivity5',6:'selleractivity6',7:'selleractivity7'
                                           ,8:'selleractivity8',9:'selleractivity9',10:'selleractivity10'
                                           ,11:'selleractivity11'})

In [63]:
#Normalised count of monthwise purchase
df_combined = sellermonthwisecount_norm (df[df.action_type == 2], df_combined)

In [64]:
df_combined = df_combined.rename(columns = {5:'sellerpurchase5',6:'sellerpurchase6',7:'sellerpurchase7'
                                           ,8:'sellerpurchase8',9:'sellerpurchase9',10:'sellerpurchase10'
                                           ,11:'sellerpurchase11'})

In [65]:
def sellermonthwisecount_avg (x , y):
    
    # Calculate activity monthwise
    temp = x[['seller_id','time_stamp']].groupby(['seller_id',x['time_stamp'].dt.month]).count()\
           .rename(columns = {'time_stamp':'count'})
    
    temp = pd.pivot_table(temp, index = 'seller_id', columns = 'time_stamp', values = 'count').reset_index().fillna(0)
    
    
    # Calculate active days monthwise
    temp1 = x[['seller_id','time_stamp']].drop_duplicates().groupby(['seller_id',x['time_stamp'].dt.month]).count()\
             .rename(columns = {'time_stamp':'active_days_count'})

    temp1 = pd.pivot_table(temp1, index = 'seller_id', columns = 'time_stamp', values = 'active_days_count' ).reset_index().fillna(0)

    
    # For average activity
    for i in range(5,12):
        temp[i] = temp[i]/temp1[i]
        
    temp = temp.fillna(0)
    
    y = pd.merge(y, temp, on = 'seller_id', how = 'left' )
    
    del temp
    del temp1
    return y


In [66]:
#Average monthwise activity
df_combined = sellermonthwisecount_avg (df, df_combined)

In [67]:
df_combined = df_combined.rename(columns = {5:'selleravgactivity5',6:'selleravgactivity6',7:'selleravgactivity7'
                                           ,8:'selleravgactivity8',9:'selleravgactivity9',10:'selleravgactivity10'
                                           ,11:'selleravgactivity11'})

In [68]:
#Average monthwise purchase
df_combined = sellermonthwisecount_avg (df[df.action_type == 2], df_combined)

In [69]:
df_combined = df_combined.rename(columns = {5:'selleravgpurchase5',6:'selleravgpurchase6',7:'selleravgpurchase7'
                                           ,8:'selleravgpurchase8',9:'selleravgpurchase9',10:'selleravgpurchase10'
                                           ,11:'selleravgpurchase11'})

# Number of Unique Brands/ Categories/ Items/ Users clicked and purchased for merchants


In [70]:
#Number of unique brands/categories/items that a merchant has an activity on before 1111

temp = df[df.time_stamp != '1900-11-11'][['seller_id','brand_id','cat_id','item_id','user_id']]\
        .groupby('seller_id').agg({'brand_id':'nunique','cat_id':'nunique','item_id':'nunique','user_id':'nunique'})\
        .reset_index().rename(columns = {'brand_id':'seller_unique_brands_before_sale',
                                         'cat_id':'seller_unique_categories_before_sale',
                                         'item_id':'seller_unique_items_before_sale',
                                         'user_id':'seller_unique_users_before_sale'})

df_combined = pd.merge(df_combined, temp, on = 'seller_id', how = 'left')


#Number of unique brands/categories/items that a merchant has sold before 1111

temp = df[(df.time_stamp != '1900-11-11')&(df.action_type == 2)][['seller_id','brand_id','cat_id','item_id','user_id']]\
        .groupby('seller_id').agg({'brand_id':'nunique','cat_id':'nunique','item_id':'nunique','user_id':'nunique'})\
        .reset_index().rename(columns = {'brand_id':'seller_unique_brands_purchased_before_sale',
                                         'cat_id':'seller_unique_categories_purchased_before_sale',
                                         'item_id':'seller_unique_items_purchased_before_sale',
                                         'user_id':'seller_unique_users_purchased_before_sale'})

df_combined = pd.merge(df_combined, temp, on = 'seller_id', how = 'left')


#Number of unique brands/categories/items that a merchant has an activity on  1111

temp = df[df.time_stamp == '1900-11-11'][['seller_id','brand_id','cat_id','item_id','user_id']]\
        .groupby('seller_id').agg({'brand_id':'nunique','cat_id':'nunique','item_id':'nunique','user_id':'nunique'})\
        .reset_index().rename(columns = {'brand_id':'seller_unique_brands_on_sale',
                                         'cat_id':'seller_unique_categories_on_sale',
                                         'item_id':'seller_unique_items_on_sale',
                                         'user_id':'seller_unique_users_on_sale'})

df_combined = pd.merge(df_combined, temp, on = 'seller_id', how = 'left')


#Number of unique brands/categories/items that a merchant has sold on 1111

temp = df[(df.time_stamp == '1900-11-11')&(df.action_type == 2)][['seller_id','brand_id','cat_id','item_id','user_id']]\
        .groupby('seller_id').agg({'brand_id':'nunique','cat_id':'nunique','item_id':'nunique','user_id':'nunique'})\
        .reset_index().rename(columns = {'brand_id':'seller_unique_brands_purchased_on_sale',
                                         'cat_id':'seller_unique_categories_purchased_on_sale',
                                         'item_id':'seller_unique_items_purchased_on_sale',
                                         'user_id':'seller_unique_users_purchased_on_sale'})

df_combined = pd.merge(df_combined, temp, on = 'seller_id', how = 'left')



In [71]:
df_combined = df_combined.fillna(0)

# User-Merchant Features

In [72]:
def userselleractivity (x, y):
    
    feat = x.groupby(['user_merchant_pair'])[0,1,2,3].sum().reset_index()
    
    y =  pd.merge(y, feat, on = 'user_merchant_pair', how = 'left') 
    
    del feat
    return y


In [73]:
#Make a new column with user-merchant pairs together as a key
df['user_merchant_pair'] = df.user_id.astype(str).str.cat(df.seller_id.astype(str), sep='_')


In [74]:
#Make a new column with user-merchant pairs together as a key
df_combined['user_merchant_pair'] = df_combined.user_id.astype(str).str.cat(df_combined.seller_id.astype(str), sep='_')


In [75]:
# Count of each action type by user on seller before 1111
df_combined = userselleractivity(df[df.time_stamp != '1900-11-11'], df_combined)


In [76]:
df_combined = df_combined.rename(columns={0: "usersellerbeforesale0",1: "usersellerbeforesale1",
                                          2: "usersellerbeforesale2",3: "usersellerbeforesale3"})

df_combined = df_combined.drop('usersellerbeforesale2', axis = 1)


In [77]:
# Count of each action type on seller on 1111
df_combined = userselleractivity(df[df.time_stamp == '1900-11-11'], df_combined)


In [78]:
df_combined = df_combined.rename(columns={0: "userselleronsale0",1: "userselleronsale1",
                                          2: "userselleronsale2",3: "userselleronsale3"})


In [79]:
# Unique categories and items 

In [80]:
temp = df[(df.time_stamp == '1900-11-11')].groupby(['user_merchant_pair'])\
        .agg({'brand_id':'nunique', 'cat_id':'nunique','item_id':'nunique'}).reset_index()\
        .rename(columns = {'brand_id':'um_unique_brands_on_sale',
                           'cat_id':'um_unique_categories_on_sale',
                           'item_id':'um_unique_items_on_sale'}) 


In [81]:
df_combined = pd.merge(df_combined, temp, on = 'user_merchant_pair', how = 'left')

In [82]:
df_combined

Unnamed: 0,user_id,seller_id,label,age_range,gender,userbeforesale0,userbeforesale1,userbeforesale2,userbeforesale3,useronsale0,useronsale1,useronsale2,useronsale3,user_ratio_0_to_2_before_sale,user_ratio_3_to_2_before_sale,user_ratio_0_to_2_on_sale,user_ratio_3_to_2_on_sale,active_days,purchase_days,useractivity5,useractivity6,useractivity7,useractivity8,useractivity9,useractivity10,useractivity11,userpurchase5,userpurchase6,userpurchase7,userpurchase8,userpurchase9,userpurchase10,userpurchase11,useravgactivity5,useravgactivity6,useravgactivity7,useravgactivity8,useravgactivity9,useravgactivity10,useravgactivity11,useravgpurchase5,useravgpurchase6,useravgpurchase7,useravgpurchase8,useravgpurchase9,useravgpurchase10,useravgpurchase11,user_unique_brands_before_sale,user_unique_categories_before_sale,user_unique_items_before_sale,user_unique_sellers_before_sale,user_unique_brands_purchased_before_sale,user_unique_categories_purchased_before_sale,user_unique_items_purchased_before_sale,user_unique_sellers_purchased_before_sale,user_unique_brands_on_sale,user_unique_categories_on_sale,user_unique_items_on_sale,user_unique_sellers_on_sale,user_unique_brands_purchased_on_sale,user_unique_categories_purchased_on_sale,user_unique_items_purchased_on_sale,user_unique_sellers_purchased_on_sale,user_all_month_purchase,user_purchase_only_11,sellerbeforesale0,sellerbeforesale1,sellerbeforesale2,sellerbeforesale3,selleronsale0,selleronsale1,selleronsale2,selleronsale3,userseller0,userseller1,userseller2,userseller3,age_avg_activity,age_avg_purchase,unique_users_age_norm,gender_avg_activity,gender_avg_purchase,unique_users_gender_norm,age_gender,age_gender_avg_activity,age_gender_avg_purchase,unique_users_age_gender_norm,age_gender_active_days_norm,age_gender_purchase_days_norm,seller_ratio_0_to_2_before_sale,seller_ratio_3_to_2_before_sale,seller_ratio_0_to_2_on_sale,seller_ratio_3_to_2_on_sale,selleractivity5,selleractivity6,selleractivity7,selleractivity8,selleractivity9,selleractivity10,selleractivity11,sellerpurchase5,sellerpurchase6,sellerpurchase7,sellerpurchase8,sellerpurchase9,sellerpurchase10,sellerpurchase11,selleravgactivity5,selleravgactivity6,selleravgactivity7,selleravgactivity8,selleravgactivity9,selleravgactivity10,selleravgactivity11,selleravgpurchase5,selleravgpurchase6,selleravgpurchase7,selleravgpurchase8,selleravgpurchase9,selleravgpurchase10,selleravgpurchase11,seller_unique_brands_before_sale,seller_unique_categories_before_sale,seller_unique_items_before_sale,seller_unique_users_before_sale,seller_unique_brands_purchased_before_sale,seller_unique_categories_purchased_before_sale,seller_unique_items_purchased_before_sale,seller_unique_users_purchased_before_sale,seller_unique_brands_on_sale,seller_unique_categories_on_sale,seller_unique_items_on_sale,seller_unique_users_on_sale,seller_unique_brands_purchased_on_sale,seller_unique_categories_purchased_on_sale,seller_unique_items_purchased_on_sale,seller_unique_users_purchased_on_sale,user_merchant_pair,usersellerbeforesale0,usersellerbeforesale1,usersellerbeforesale3,userselleronsale0,userselleronsale1,userselleronsale2,userselleronsale3,um_unique_brands_on_sale,um_unique_categories_on_sale,um_unique_items_on_sale
0,34176,3906,0,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,13203.0,26.0,233.0,937.0,1667.0,2.0,177.0,24.0,36.0,0.0,1.0,2.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,56.665236,4.021459,9.418079,0.135593,0.082918,0.163563,0.086545,0.097117,0.157908,0.153544,0.258406,0.121951,0.151220,0.034146,0.039024,0.148780,0.070732,0.434146,64.238095,88.700000,45.419355,50.967742,85.633333,80.580645,382.181818,3.571429,2.695652,1.555556,2.285714,3.588235,1.705882,89.000000,2,20,307,5330,1.0,14.0,68.0,167.0,2,17,189,672,1,13,54,145,34176_3906,33.0,0.0,2.0,3.0,0.0,1.0,0.0,1,1,1
1,34176,121,0,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,48956.0,90.0,1565.0,2441.0,23309.0,31.0,3215.0,258.0,13.0,0.0,1.0,0.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,31.281789,1.559744,7.250078,0.080249,0.055569,0.036349,0.065110,0.056057,0.130558,0.082564,0.573793,0.047071,0.016109,0.062552,0.034310,0.093933,0.055858,0.690167,211.333333,96.766667,167.741935,144.419355,347.566667,212.709677,4166.000000,10.714286,4.052632,12.458333,6.074074,14.966667,15.705882,412.375000,2,26,1170,8655,1.0,19.0,378.0,958.0,2,18,369,3942,1,16,276,1884,34176_121,8.0,0.0,0.0,5.0,0.0,1.0,0.0,1,1,1
2,34176,4356,1,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,5724.0,16.0,858.0,188.0,370.0,0.0,105.0,8.0,12.0,0.0,6.0,0.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,6.671329,0.219114,3.523810,0.076190,0.000550,0.020911,0.394690,0.180355,0.126840,0.111570,0.165085,0.000000,0.026999,0.406023,0.154725,0.127726,0.111111,0.173416,1.333333,8.000000,95.633333,42.290323,30.733333,26.161290,109.090909,0.000000,2.888889,21.722222,8.277778,5.590909,4.863636,15.181818,2,14,62,2137,1.0,8.0,26.0,611.0,1,13,42,185,1,7,21,64,34176_4356,6.0,0.0,0.0,6.0,0.0,6.0,0.0,1,1,2
3,34176,2217,0,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,39620.0,67.0,1018.0,3787.0,12610.0,34.0,2703.0,363.0,1.0,0.0,1.0,0.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,38.919450,3.720039,4.665187,0.134295,0.033570,0.085130,0.072672,0.053869,0.111059,0.145842,0.497857,0.026874,0.041924,0.034668,0.021768,0.045418,0.099704,0.729643,96.238095,170.833333,141.129032,104.612903,222.866667,283.225806,2724.727273,4.761905,5.379310,4.777778,4.764706,7.041667,14.269231,452.500000,2,5,377,14199,1.0,5.0,158.0,913.0,2,5,174,4319,1,4,114,2428,34176_2217,,,,1.0,0.0,1.0,0.0,1,1,1
4,230784,4818,0,0.0,0.0,46.0,0.0,6.0,0.0,1.0,0.0,1.0,0.0,7.666667,0.000000,1.000000,0.000000,15,5,0.00000,0.518519,0.092593,0.000000,0.074074,0.037037,0.277778,0.000000,0.571429,0.142857,0.000000,0.000000,0.142857,0.142857,0.0,4.000000,5.000000,0.000000,1.333333,2.000000,3.750000,0.000000,1.333333,1.0,0.0,0.0,1.0,1.0,19,17,31,20,5.0,6.0,6.0,5.0,1,1,1,1,1,1,1,1,0.0,0.0,27211.0,103.0,583.0,1770.0,16057.0,26.0,2150.0,189.0,7.0,0.0,1.0,0.0,105.742450,6.229536,0.852016,141.133806,8.156852,1.000000,0.0_0.0,117.407391,6.637107,0.895509,0.725957,0.730747,46.674099,3.036021,7.468372,0.087907,0.027574,0.060430,0.040987,0.040321,0.048972,0.114143,0.667575,0.035492,0.047567,0.032565,0.024881,0.031467,0.037322,0.790706,63.142857,96.866667,63.580645,62.548387,78.500000,177.064516,2918.454545,4.619048,5.000000,3.708333,2.833333,3.185185,3.923077,360.166667,2,27,461,5569,1.0,17.0,126.0,332.0,2,25,302,3052,1,23,199,1285,230784_4818,6.0,0.0,0.0,1.0,0.0,1.0,0.0,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
260859,359807,4325,0,4.0,1.0,58.0,0.0,4.0,1.0,49.0,0.0,5.0,0.0,14.500000,0.250000,9.800000,0.000000,11,4,0.00000,0.000000,0.000000,0.000000,0.085470,0.196581,0.717949,0.000000,0.000000,0.000000,0.000000,0.111111,0.222222,0.666667,0.0,0.000000,0.000000,0.000000,3.333333,4.600000,21.000000,0.000000,0.000000,0.0,0.0,1.0,1.0,3.0,22,16,31,21,4.0,4.0,4.0,4.0,11,11,19,13,2,3,3,2,0.0,0.0,11616.0,29.0,1124.0,848.0,5095.0,18.0,996.0,63.0,18.0,0.0,2.0,0.0,147.542249,9.620345,0.716419,99.741350,6.668382,0.425959,4.0_1.0,124.098490,8.449806,0.317763,0.272280,0.330118,10.334520,0.754448,5.115462,0.063253,0.023599,0.054828,0.075092,0.078175,0.124817,0.191369,0.452120,0.030189,0.047642,0.079717,0.079245,0.108019,0.157075,0.498113,22.238095,36.166667,47.935484,49.903226,82.333333,122.161290,813.363636,3.047619,3.740741,5.451613,5.600000,7.633333,10.741935,96.000000,2,18,75,4412,1.0,15.0,41.0,849.0,2,14,38,1421,1,11,29,779,359807_4325,,,,18.0,0.0,2.0,0.0,1,2,6
260860,294527,3971,0,0.0,1.0,121.0,0.0,1.0,24.0,41.0,0.0,4.0,7.0,121.000000,24.000000,10.250000,1.750000,5,1,0.00000,0.000000,0.000000,0.000000,0.101010,0.000000,0.898990,0.000000,0.000000,0.000000,0.000000,0.200000,0.000000,0.800000,0.0,0.000000,0.000000,0.000000,20.000000,0.000000,35.600000,0.000000,0.000000,0.0,0.0,1.0,0.0,4.0,22,16,73,23,1.0,1.0,1.0,1.0,20,10,22,20,4,4,4,4,0.0,0.0,17573.0,75.0,871.0,1414.0,7029.0,19.0,1737.0,174.0,13.0,0.0,1.0,3.0,105.742450,6.229536,0.852016,99.741350,6.668382,0.425959,0.0_1.0,77.416906,5.293887,0.303650,0.162313,0.197636,20.175660,1.623421,4.046632,0.100173,0.017825,0.073204,0.072200,0.066524,0.123944,0.133636,0.512668,0.020706,0.053298,0.049463,0.046396,0.084739,0.067868,0.677531,24.523810,70.500000,67.290323,62.000000,119.366667,124.548387,1346.545455,3.375000,5.560000,4.448276,4.033333,7.620690,5.709677,176.700000,2,7,271,5720,1.0,6.0,98.0,577.0,2,6,129,2669,1,6,79,1432,294527_3971,8.0,0.0,3.0,5.0,0.0,1.0,0.0,1,1,2
260861,294527,152,0,0.0,1.0,121.0,0.0,1.0,24.0,41.0,0.0,4.0,7.0,121.000000,24.000000,10.250000,1.750000,5,1,0.00000,0.000000,0.000000,0.000000,0.101010,0.000000,0.898990,0.000000,0.000000,0.000000,0.000000,0.200000,0.000000,0.800000,0.0,0.000000,0.000000,0.000000,20.000000,0.000000,35.600000,0.000000,0.000000,0.0,0.0,1.0,0.0,4.0,22,16,73,23,1.0,1.0,1.0,1.0,20,10,22,20,4,4,4,4,0.0,0.0,4303.0,8.0,219.0,458.0,2526.0,4.0,544.0,45.0,7.0,0.0,1.0,1.0,105.742450,6.229536,0.852016,99.741350,6.668382,0.425959,0.0_1.0,77.416906,5.293887,0.303650,0.162313,0.197636,19.648402,2.091324,4.643382,0.082721,0.002344,0.004811,0.026027,0.125941,0.096336,0.068583,0.675959,0.001311,0.002621,0.000000,0.171691,0.095675,0.011796,0.716907,1.900000,2.294118,11.722222,46.409091,26.931034,17.935484,498.181818,1.000000,1.000000,0.000000,26.200000,14.600000,2.250000,136.750000,2,13,162,1698,1.0,5.0,16.0,176.0,2,9,99,978,1,5,40,474,294527_152,,,,7.0,0.0,1.0,1.0,1,1,1
260862,294527,2537,0,0.0,1.0,121.0,0.0,1.0,24.0,41.0,0.0,4.0,7.0,121.000000,24.000000,10.250000,1.750000,5,1,0.00000,0.000000,0.000000,0.000000,0.101010,0.000000,0.898990,0.000000,0.000000,0.000000,0.000000,0.200000,0.000000,0.800000,0.0,0.000000,0.000000,0.000000,20.000000,0.000000,35.600000,0.000000,0.000000,0.0,0.0,1.0,0.0,4.0,22,16,73,23,1.0,1.0,1.0,1.0,20,10,22,20,4,4,4,4,0.0,0.0,26137.0,85.0,1829.0,2038.0,13456.0,38.0,4231.0,280.0,0.0,0.0,1.0,0.0,105.742450,6.229536,0.852016,99.741350,6.668382,0.425959,0.0_1.0,77.416906,5.293887,0.303650,0.162313,0.197636,14.290323,1.114270,3.180336,0.066178,0.012496,0.089450,0.030461,0.072358,0.111594,0.105793,0.577848,0.015017,0.045710,0.018977,0.048515,0.093069,0.057426,0.721287,28.619048,143.400000,47.258065,112.258065,178.900000,164.129032,2526.454545,4.789474,10.653846,3.965517,9.483871,18.800000,11.225806,485.666667,2,20,221,8269,1.0,15.0,136.0,1414.0,2,18,144,3980,1,15,127,2935,294527_2537,,,,0.0,0.0,1.0,0.0,1,1,1


In [83]:
temp = df[(df.action_type == 2)&(df.time_stamp == '1900-11-11')].groupby(['user_merchant_pair'])\
        .agg({'brand_id':'nunique', 'cat_id':'nunique','item_id':'nunique'}).reset_index()\
        .rename(columns = {'brand_id':'um_unique_brands_purchased_on_sale',
                           'cat_id':'um_unique_categories_purchased_on_sale',
                           'item_id':'um_unique_items_purchased_on_sale'}) 


In [84]:
df_combined = pd.merge(df_combined, temp, on = 'user_merchant_pair', how = 'left')

In [85]:
temp = df[df.time_stamp != '1900-11-11'].groupby(['user_merchant_pair'])['action_type'].count().reset_index()\
        .rename(columns = {'action_type':'um_activity_before_sale'})


In [86]:
df_combined = pd.merge(df_combined, temp, on = 'user_merchant_pair', how = 'left')

In [87]:
df_combined = df_combined.fillna(0)

# Days difference between first and last transaction

# Activity

In [88]:
temp = df[['user_id','time_stamp']].drop_duplicates().sort_values(['user_id','time_stamp'])\
        .groupby(['user_id'])['time_stamp'].first().reset_index()\
        .rename(columns = {'time_stamp':'first_date_activity'})

temp['last_date_activity'] = '1900-11-11'

temp['last_date_activity'] = pd.to_datetime(temp['last_date_activity'])

temp['activity_diff_from_1111'] = (temp['last_date_activity'] - temp['first_date_activity']).dt.days


In [89]:
temp1 = df[(df.time_stamp != '1900-11-11')&(df.time_stamp != '1900-11-12')][['user_id','time_stamp']]\
        .drop_duplicates().sort_values(['user_id','time_stamp']).groupby(['user_id'])['time_stamp'].last()\
        .reset_index().rename(columns = {'time_stamp':'last_date_before_1111'})

temp = pd.merge(temp, temp1, on = 'user_id', how = 'left')

In [90]:
temp['activity_diff_from_second_last'] = (temp['last_date_before_1111'] - temp['first_date_activity']).dt.days

In [91]:
temp['activity_diff_from_1111'] = temp['activity_diff_from_1111']/temp['activity_diff_from_1111'].max()

In [92]:
temp['activity_diff_from_second_last'] = temp['activity_diff_from_second_last']/temp['activity_diff_from_second_last'].max()



In [93]:
temp

Unnamed: 0,user_id,first_date_activity,last_date_activity,activity_diff_from_1111,last_date_before_1111,activity_diff_from_second_last
0,1,1900-10-09,1900-11-11,0.179348,1900-10-21,0.065574
1,2,1900-05-27,1900-11-11,0.913043,1900-11-08,0.901639
2,3,1900-05-16,1900-11-11,0.972826,1900-11-09,0.967213
3,4,1900-05-27,1900-11-11,0.913043,1900-11-10,0.912568
4,5,1900-05-19,1900-11-11,0.956522,1900-10-31,0.901639
...,...,...,...,...,...,...
424165,424166,1900-05-14,1900-11-11,0.983696,1900-11-09,0.978142
424166,424167,1900-05-29,1900-11-11,0.902174,1900-11-08,0.890710
424167,424168,1900-05-23,1900-11-11,0.934783,1900-11-08,0.923497
424168,424169,1900-05-11,1900-11-11,1.000000,1900-11-10,1.000000


In [94]:
df_combined = pd.merge(df_combined, temp[['user_id','activity_diff_from_1111','activity_diff_from_second_last']],
                       on = 'user_id', how = 'left')



# Purchase

In [95]:
temp = df[df.action_type == 2][['user_id','time_stamp']].drop_duplicates().sort_values(['user_id','time_stamp'])\
        .groupby(['user_id'])['time_stamp'].first().reset_index()\
        .rename(columns = {'time_stamp':'first_date_purchase'})

temp['last_date_purchase'] = '1900-11-11'

temp['last_date_purchase'] = pd.to_datetime(temp['last_date_purchase'])

temp['purchase_diff_from_1111'] = (temp['last_date_purchase'] - temp['first_date_purchase']).dt.days


In [96]:
temp1 = df[df.action_type == 2][(df.time_stamp != '1900-11-11')&(df.time_stamp != '1900-11-12')][['user_id','time_stamp']]\
        .drop_duplicates().sort_values(['user_id','time_stamp']).groupby(['user_id'])['time_stamp'].last()\
        .reset_index().rename(columns = {'time_stamp':'purchase_last_date_before_1111'})

temp = pd.merge(temp, temp1, on = 'user_id', how = 'left')

  """Entry point for launching an IPython kernel.


In [97]:
temp['purchase_diff_from_second_last'] = (temp['purchase_last_date_before_1111'] - temp['first_date_purchase']).dt.days


In [98]:
temp['purchase_diff_from_1111'] = temp['purchase_diff_from_1111']/temp['purchase_diff_from_1111'].max()

temp['purchase_diff_from_second_last'] = temp['purchase_diff_from_second_last']/temp['purchase_diff_from_second_last'].max()


In [99]:
temp = temp.fillna(0)

In [100]:
df_combined = pd.merge(df_combined, temp[['user_id','purchase_diff_from_1111','purchase_diff_from_second_last']],
                       on = 'user_id', how = 'left')



In [101]:
df_combined

Unnamed: 0,user_id,seller_id,label,age_range,gender,userbeforesale0,userbeforesale1,userbeforesale2,userbeforesale3,useronsale0,useronsale1,useronsale2,useronsale3,user_ratio_0_to_2_before_sale,user_ratio_3_to_2_before_sale,user_ratio_0_to_2_on_sale,user_ratio_3_to_2_on_sale,active_days,purchase_days,useractivity5,useractivity6,useractivity7,useractivity8,useractivity9,useractivity10,useractivity11,userpurchase5,userpurchase6,userpurchase7,userpurchase8,userpurchase9,userpurchase10,userpurchase11,useravgactivity5,useravgactivity6,useravgactivity7,useravgactivity8,useravgactivity9,useravgactivity10,useravgactivity11,useravgpurchase5,useravgpurchase6,useravgpurchase7,useravgpurchase8,useravgpurchase9,useravgpurchase10,useravgpurchase11,user_unique_brands_before_sale,user_unique_categories_before_sale,user_unique_items_before_sale,user_unique_sellers_before_sale,user_unique_brands_purchased_before_sale,user_unique_categories_purchased_before_sale,user_unique_items_purchased_before_sale,user_unique_sellers_purchased_before_sale,user_unique_brands_on_sale,user_unique_categories_on_sale,user_unique_items_on_sale,user_unique_sellers_on_sale,user_unique_brands_purchased_on_sale,user_unique_categories_purchased_on_sale,user_unique_items_purchased_on_sale,user_unique_sellers_purchased_on_sale,user_all_month_purchase,user_purchase_only_11,sellerbeforesale0,sellerbeforesale1,sellerbeforesale2,sellerbeforesale3,selleronsale0,selleronsale1,selleronsale2,selleronsale3,userseller0,userseller1,userseller2,userseller3,age_avg_activity,age_avg_purchase,unique_users_age_norm,gender_avg_activity,gender_avg_purchase,unique_users_gender_norm,age_gender,age_gender_avg_activity,age_gender_avg_purchase,unique_users_age_gender_norm,age_gender_active_days_norm,age_gender_purchase_days_norm,seller_ratio_0_to_2_before_sale,seller_ratio_3_to_2_before_sale,seller_ratio_0_to_2_on_sale,seller_ratio_3_to_2_on_sale,selleractivity5,selleractivity6,selleractivity7,selleractivity8,selleractivity9,selleractivity10,selleractivity11,sellerpurchase5,sellerpurchase6,sellerpurchase7,sellerpurchase8,sellerpurchase9,sellerpurchase10,sellerpurchase11,selleravgactivity5,selleravgactivity6,selleravgactivity7,selleravgactivity8,selleravgactivity9,selleravgactivity10,selleravgactivity11,selleravgpurchase5,selleravgpurchase6,selleravgpurchase7,selleravgpurchase8,selleravgpurchase9,selleravgpurchase10,selleravgpurchase11,seller_unique_brands_before_sale,seller_unique_categories_before_sale,seller_unique_items_before_sale,seller_unique_users_before_sale,seller_unique_brands_purchased_before_sale,seller_unique_categories_purchased_before_sale,seller_unique_items_purchased_before_sale,seller_unique_users_purchased_before_sale,seller_unique_brands_on_sale,seller_unique_categories_on_sale,seller_unique_items_on_sale,seller_unique_users_on_sale,seller_unique_brands_purchased_on_sale,seller_unique_categories_purchased_on_sale,seller_unique_items_purchased_on_sale,seller_unique_users_purchased_on_sale,user_merchant_pair,usersellerbeforesale0,usersellerbeforesale1,usersellerbeforesale3,userselleronsale0,userselleronsale1,userselleronsale2,userselleronsale3,um_unique_brands_on_sale,um_unique_categories_on_sale,um_unique_items_on_sale,um_unique_brands_purchased_on_sale,um_unique_categories_purchased_on_sale,um_unique_items_purchased_on_sale,um_activity_before_sale,activity_diff_from_1111,activity_diff_from_second_last,purchase_diff_from_1111,purchase_diff_from_second_last
0,34176,3906,0,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,13203.0,26.0,233.0,937.0,1667.0,2.0,177.0,24.0,36.0,0.0,1.0,2.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,56.665236,4.021459,9.418079,0.135593,0.082918,0.163563,0.086545,0.097117,0.157908,0.153544,0.258406,0.121951,0.151220,0.034146,0.039024,0.148780,0.070732,0.434146,64.238095,88.700000,45.419355,50.967742,85.633333,80.580645,382.181818,3.571429,2.695652,1.555556,2.285714,3.588235,1.705882,89.000000,2,20,307,5330,1.0,14.0,68.0,167.0,2,17,189,672,1,13,54,145,34176_3906,33.0,0.0,2.0,3.0,0.0,1.0,0.0,1,1,1,1,1,1,35.0,0.945652,0.945355,0.945652,0.934426
1,34176,121,0,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,48956.0,90.0,1565.0,2441.0,23309.0,31.0,3215.0,258.0,13.0,0.0,1.0,0.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,31.281789,1.559744,7.250078,0.080249,0.055569,0.036349,0.065110,0.056057,0.130558,0.082564,0.573793,0.047071,0.016109,0.062552,0.034310,0.093933,0.055858,0.690167,211.333333,96.766667,167.741935,144.419355,347.566667,212.709677,4166.000000,10.714286,4.052632,12.458333,6.074074,14.966667,15.705882,412.375000,2,26,1170,8655,1.0,19.0,378.0,958.0,2,18,369,3942,1,16,276,1884,34176_121,8.0,0.0,0.0,5.0,0.0,1.0,0.0,1,1,1,1,1,1,8.0,0.945652,0.945355,0.945652,0.934426
2,34176,4356,1,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,5724.0,16.0,858.0,188.0,370.0,0.0,105.0,8.0,12.0,0.0,6.0,0.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,6.671329,0.219114,3.523810,0.076190,0.000550,0.020911,0.394690,0.180355,0.126840,0.111570,0.165085,0.000000,0.026999,0.406023,0.154725,0.127726,0.111111,0.173416,1.333333,8.000000,95.633333,42.290323,30.733333,26.161290,109.090909,0.000000,2.888889,21.722222,8.277778,5.590909,4.863636,15.181818,2,14,62,2137,1.0,8.0,26.0,611.0,1,13,42,185,1,7,21,64,34176_4356,6.0,0.0,0.0,6.0,0.0,6.0,0.0,1,1,2,1,1,2,6.0,0.945652,0.945355,0.945652,0.934426
3,34176,2217,0,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,39620.0,67.0,1018.0,3787.0,12610.0,34.0,2703.0,363.0,1.0,0.0,1.0,0.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,38.919450,3.720039,4.665187,0.134295,0.033570,0.085130,0.072672,0.053869,0.111059,0.145842,0.497857,0.026874,0.041924,0.034668,0.021768,0.045418,0.099704,0.729643,96.238095,170.833333,141.129032,104.612903,222.866667,283.225806,2724.727273,4.761905,5.379310,4.777778,4.764706,7.041667,14.269231,452.500000,2,5,377,14199,1.0,5.0,158.0,913.0,2,5,174,4319,1,4,114,2428,34176_2217,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1,1,1,1,1,1,0.0,0.945652,0.945355,0.945652,0.934426
4,230784,4818,0,0.0,0.0,46.0,0.0,6.0,0.0,1.0,0.0,1.0,0.0,7.666667,0.000000,1.000000,0.000000,15,5,0.00000,0.518519,0.092593,0.000000,0.074074,0.037037,0.277778,0.000000,0.571429,0.142857,0.000000,0.000000,0.142857,0.142857,0.0,4.000000,5.000000,0.000000,1.333333,2.000000,3.750000,0.000000,1.333333,1.0,0.0,0.0,1.0,1.0,19,17,31,20,5.0,6.0,6.0,5.0,1,1,1,1,1,1,1,1,0.0,0.0,27211.0,103.0,583.0,1770.0,16057.0,26.0,2150.0,189.0,7.0,0.0,1.0,0.0,105.742450,6.229536,0.852016,141.133806,8.156852,1.000000,0.0_0.0,117.407391,6.637107,0.895509,0.725957,0.730747,46.674099,3.036021,7.468372,0.087907,0.027574,0.060430,0.040987,0.040321,0.048972,0.114143,0.667575,0.035492,0.047567,0.032565,0.024881,0.031467,0.037322,0.790706,63.142857,96.866667,63.580645,62.548387,78.500000,177.064516,2918.454545,4.619048,5.000000,3.708333,2.833333,3.185185,3.923077,360.166667,2,27,461,5569,1.0,17.0,126.0,332.0,2,25,302,3052,1,23,199,1285,230784_4818,6.0,0.0,0.0,1.0,0.0,1.0,0.0,1,1,1,1,1,1,6.0,0.885870,0.885246,0.885870,0.743169
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
260859,359807,4325,0,4.0,1.0,58.0,0.0,4.0,1.0,49.0,0.0,5.0,0.0,14.500000,0.250000,9.800000,0.000000,11,4,0.00000,0.000000,0.000000,0.000000,0.085470,0.196581,0.717949,0.000000,0.000000,0.000000,0.000000,0.111111,0.222222,0.666667,0.0,0.000000,0.000000,0.000000,3.333333,4.600000,21.000000,0.000000,0.000000,0.0,0.0,1.0,1.0,3.0,22,16,31,21,4.0,4.0,4.0,4.0,11,11,19,13,2,3,3,2,0.0,0.0,11616.0,29.0,1124.0,848.0,5095.0,18.0,996.0,63.0,18.0,0.0,2.0,0.0,147.542249,9.620345,0.716419,99.741350,6.668382,0.425959,4.0_1.0,124.098490,8.449806,0.317763,0.272280,0.330118,10.334520,0.754448,5.115462,0.063253,0.023599,0.054828,0.075092,0.078175,0.124817,0.191369,0.452120,0.030189,0.047642,0.079717,0.079245,0.108019,0.157075,0.498113,22.238095,36.166667,47.935484,49.903226,82.333333,122.161290,813.363636,3.047619,3.740741,5.451613,5.600000,7.633333,10.741935,96.000000,2,18,75,4412,1.0,15.0,41.0,849.0,2,14,38,1421,1,11,29,779,359807_4325,0.0,0.0,0.0,18.0,0.0,2.0,0.0,1,2,6,1,1,1,0.0,0.282609,0.273224,0.239130,0.202186
260860,294527,3971,0,0.0,1.0,121.0,0.0,1.0,24.0,41.0,0.0,4.0,7.0,121.000000,24.000000,10.250000,1.750000,5,1,0.00000,0.000000,0.000000,0.000000,0.101010,0.000000,0.898990,0.000000,0.000000,0.000000,0.000000,0.200000,0.000000,0.800000,0.0,0.000000,0.000000,0.000000,20.000000,0.000000,35.600000,0.000000,0.000000,0.0,0.0,1.0,0.0,4.0,22,16,73,23,1.0,1.0,1.0,1.0,20,10,22,20,4,4,4,4,0.0,0.0,17573.0,75.0,871.0,1414.0,7029.0,19.0,1737.0,174.0,13.0,0.0,1.0,3.0,105.742450,6.229536,0.852016,99.741350,6.668382,0.425959,0.0_1.0,77.416906,5.293887,0.303650,0.162313,0.197636,20.175660,1.623421,4.046632,0.100173,0.017825,0.073204,0.072200,0.066524,0.123944,0.133636,0.512668,0.020706,0.053298,0.049463,0.046396,0.084739,0.067868,0.677531,24.523810,70.500000,67.290323,62.000000,119.366667,124.548387,1346.545455,3.375000,5.560000,4.448276,4.033333,7.620690,5.709677,176.700000,2,7,271,5720,1.0,6.0,98.0,577.0,2,6,129,2669,1,6,79,1432,294527_3971,8.0,0.0,3.0,5.0,0.0,1.0,0.0,1,1,2,1,1,1,11.0,0.255435,0.251366,0.255435,0.000000
260861,294527,152,0,0.0,1.0,121.0,0.0,1.0,24.0,41.0,0.0,4.0,7.0,121.000000,24.000000,10.250000,1.750000,5,1,0.00000,0.000000,0.000000,0.000000,0.101010,0.000000,0.898990,0.000000,0.000000,0.000000,0.000000,0.200000,0.000000,0.800000,0.0,0.000000,0.000000,0.000000,20.000000,0.000000,35.600000,0.000000,0.000000,0.0,0.0,1.0,0.0,4.0,22,16,73,23,1.0,1.0,1.0,1.0,20,10,22,20,4,4,4,4,0.0,0.0,4303.0,8.0,219.0,458.0,2526.0,4.0,544.0,45.0,7.0,0.0,1.0,1.0,105.742450,6.229536,0.852016,99.741350,6.668382,0.425959,0.0_1.0,77.416906,5.293887,0.303650,0.162313,0.197636,19.648402,2.091324,4.643382,0.082721,0.002344,0.004811,0.026027,0.125941,0.096336,0.068583,0.675959,0.001311,0.002621,0.000000,0.171691,0.095675,0.011796,0.716907,1.900000,2.294118,11.722222,46.409091,26.931034,17.935484,498.181818,1.000000,1.000000,0.000000,26.200000,14.600000,2.250000,136.750000,2,13,162,1698,1.0,5.0,16.0,176.0,2,9,99,978,1,5,40,474,294527_152,0.0,0.0,0.0,7.0,0.0,1.0,1.0,1,1,1,1,1,1,0.0,0.255435,0.251366,0.255435,0.000000
260862,294527,2537,0,0.0,1.0,121.0,0.0,1.0,24.0,41.0,0.0,4.0,7.0,121.000000,24.000000,10.250000,1.750000,5,1,0.00000,0.000000,0.000000,0.000000,0.101010,0.000000,0.898990,0.000000,0.000000,0.000000,0.000000,0.200000,0.000000,0.800000,0.0,0.000000,0.000000,0.000000,20.000000,0.000000,35.600000,0.000000,0.000000,0.0,0.0,1.0,0.0,4.0,22,16,73,23,1.0,1.0,1.0,1.0,20,10,22,20,4,4,4,4,0.0,0.0,26137.0,85.0,1829.0,2038.0,13456.0,38.0,4231.0,280.0,0.0,0.0,1.0,0.0,105.742450,6.229536,0.852016,99.741350,6.668382,0.425959,0.0_1.0,77.416906,5.293887,0.303650,0.162313,0.197636,14.290323,1.114270,3.180336,0.066178,0.012496,0.089450,0.030461,0.072358,0.111594,0.105793,0.577848,0.015017,0.045710,0.018977,0.048515,0.093069,0.057426,0.721287,28.619048,143.400000,47.258065,112.258065,178.900000,164.129032,2526.454545,4.789474,10.653846,3.965517,9.483871,18.800000,11.225806,485.666667,2,20,221,8269,1.0,15.0,136.0,1414.0,2,18,144,3980,1,15,127,2935,294527_2537,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1,1,1,1,1,1,0.0,0.255435,0.251366,0.255435,0.000000


# Merchant-Repeat

In [102]:
# Number of repeat buyers

In [103]:
temp = df[(df.action_type == 2)&(df.time_stamp != '1900-11-11')].groupby(['seller_id','user_id'])['action_type']\
        .count().reset_index().rename(columns = {'action_type':'count_of_purchase'})

In [104]:
temp1 = temp[temp.count_of_purchase > 1].groupby(['seller_id'])['user_id'].nunique().reset_index()\
        .rename(columns = {'user_id':'count_of_repeat_users_for_merchant'})

In [105]:
temp2 = temp[temp.count_of_purchase == 1].groupby(['seller_id'])['user_id'].nunique().reset_index()\
        .rename(columns = {'user_id':'count_of_single_time_users_for_merchant'})

In [106]:
temp = pd.merge(temp1, temp2, on = 'seller_id', how = 'outer')

In [107]:
temp['ratio_of_repeat_to_single_time_users_for_merchant'] = temp['count_of_repeat_users_for_merchant']/temp['count_of_single_time_users_for_merchant']


In [108]:
temp = temp.fillna(0)

In [109]:
# df_combined = pd.merge(df_combined, temp, on = 'seller_id', how = 'left')

In [110]:
temp = df[(df.action_type == 2)&(df.label == -1)].groupby(['seller_id'])['user_id'].nunique().reset_index()\
        .rename(columns = {'user_id':'seller_count_of_label-1'})

In [111]:
temp

Unnamed: 0,seller_id,seller_count_of_label-1
0,1,7666
1,2,57
2,3,65
3,4,201
4,5,120
...,...,...
4988,4991,12
4989,4992,387
4990,4993,398
4991,4994,142


In [112]:
df_combined = pd.merge(df_combined, temp, on = 'seller_id', how = 'left')

In [113]:
df_combined

Unnamed: 0,user_id,seller_id,label,age_range,gender,userbeforesale0,userbeforesale1,userbeforesale2,userbeforesale3,useronsale0,useronsale1,useronsale2,useronsale3,user_ratio_0_to_2_before_sale,user_ratio_3_to_2_before_sale,user_ratio_0_to_2_on_sale,user_ratio_3_to_2_on_sale,active_days,purchase_days,useractivity5,useractivity6,useractivity7,useractivity8,useractivity9,useractivity10,useractivity11,userpurchase5,userpurchase6,userpurchase7,userpurchase8,userpurchase9,userpurchase10,userpurchase11,useravgactivity5,useravgactivity6,useravgactivity7,useravgactivity8,useravgactivity9,useravgactivity10,useravgactivity11,useravgpurchase5,useravgpurchase6,useravgpurchase7,useravgpurchase8,useravgpurchase9,useravgpurchase10,useravgpurchase11,user_unique_brands_before_sale,user_unique_categories_before_sale,user_unique_items_before_sale,user_unique_sellers_before_sale,user_unique_brands_purchased_before_sale,user_unique_categories_purchased_before_sale,user_unique_items_purchased_before_sale,user_unique_sellers_purchased_before_sale,user_unique_brands_on_sale,user_unique_categories_on_sale,user_unique_items_on_sale,user_unique_sellers_on_sale,user_unique_brands_purchased_on_sale,user_unique_categories_purchased_on_sale,user_unique_items_purchased_on_sale,user_unique_sellers_purchased_on_sale,user_all_month_purchase,user_purchase_only_11,sellerbeforesale0,sellerbeforesale1,sellerbeforesale2,sellerbeforesale3,selleronsale0,selleronsale1,selleronsale2,selleronsale3,userseller0,userseller1,userseller2,userseller3,age_avg_activity,age_avg_purchase,unique_users_age_norm,gender_avg_activity,gender_avg_purchase,unique_users_gender_norm,age_gender,age_gender_avg_activity,age_gender_avg_purchase,unique_users_age_gender_norm,age_gender_active_days_norm,age_gender_purchase_days_norm,seller_ratio_0_to_2_before_sale,seller_ratio_3_to_2_before_sale,seller_ratio_0_to_2_on_sale,seller_ratio_3_to_2_on_sale,selleractivity5,selleractivity6,selleractivity7,selleractivity8,selleractivity9,selleractivity10,selleractivity11,sellerpurchase5,sellerpurchase6,sellerpurchase7,sellerpurchase8,sellerpurchase9,sellerpurchase10,sellerpurchase11,selleravgactivity5,selleravgactivity6,selleravgactivity7,selleravgactivity8,selleravgactivity9,selleravgactivity10,selleravgactivity11,selleravgpurchase5,selleravgpurchase6,selleravgpurchase7,selleravgpurchase8,selleravgpurchase9,selleravgpurchase10,selleravgpurchase11,seller_unique_brands_before_sale,seller_unique_categories_before_sale,seller_unique_items_before_sale,seller_unique_users_before_sale,seller_unique_brands_purchased_before_sale,seller_unique_categories_purchased_before_sale,seller_unique_items_purchased_before_sale,seller_unique_users_purchased_before_sale,seller_unique_brands_on_sale,seller_unique_categories_on_sale,seller_unique_items_on_sale,seller_unique_users_on_sale,seller_unique_brands_purchased_on_sale,seller_unique_categories_purchased_on_sale,seller_unique_items_purchased_on_sale,seller_unique_users_purchased_on_sale,user_merchant_pair,usersellerbeforesale0,usersellerbeforesale1,usersellerbeforesale3,userselleronsale0,userselleronsale1,userselleronsale2,userselleronsale3,um_unique_brands_on_sale,um_unique_categories_on_sale,um_unique_items_on_sale,um_unique_brands_purchased_on_sale,um_unique_categories_purchased_on_sale,um_unique_items_purchased_on_sale,um_activity_before_sale,activity_diff_from_1111,activity_diff_from_second_last,purchase_diff_from_1111,purchase_diff_from_second_last,seller_count_of_label-1
0,34176,3906,0,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,13203.0,26.0,233.0,937.0,1667.0,2.0,177.0,24.0,36.0,0.0,1.0,2.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,56.665236,4.021459,9.418079,0.135593,0.082918,0.163563,0.086545,0.097117,0.157908,0.153544,0.258406,0.121951,0.151220,0.034146,0.039024,0.148780,0.070732,0.434146,64.238095,88.700000,45.419355,50.967742,85.633333,80.580645,382.181818,3.571429,2.695652,1.555556,2.285714,3.588235,1.705882,89.000000,2,20,307,5330,1.0,14.0,68.0,167.0,2,17,189,672,1,13,54,145,34176_3906,33.0,0.0,2.0,3.0,0.0,1.0,0.0,1,1,1,1,1,1,35.0,0.945652,0.945355,0.945652,0.934426,167.0
1,34176,121,0,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,48956.0,90.0,1565.0,2441.0,23309.0,31.0,3215.0,258.0,13.0,0.0,1.0,0.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,31.281789,1.559744,7.250078,0.080249,0.055569,0.036349,0.065110,0.056057,0.130558,0.082564,0.573793,0.047071,0.016109,0.062552,0.034310,0.093933,0.055858,0.690167,211.333333,96.766667,167.741935,144.419355,347.566667,212.709677,4166.000000,10.714286,4.052632,12.458333,6.074074,14.966667,15.705882,412.375000,2,26,1170,8655,1.0,19.0,378.0,958.0,2,18,369,3942,1,16,276,1884,34176_121,8.0,0.0,0.0,5.0,0.0,1.0,0.0,1,1,1,1,1,1,8.0,0.945652,0.945355,0.945652,0.934426,958.0
2,34176,4356,1,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,5724.0,16.0,858.0,188.0,370.0,0.0,105.0,8.0,12.0,0.0,6.0,0.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,6.671329,0.219114,3.523810,0.076190,0.000550,0.020911,0.394690,0.180355,0.126840,0.111570,0.165085,0.000000,0.026999,0.406023,0.154725,0.127726,0.111111,0.173416,1.333333,8.000000,95.633333,42.290323,30.733333,26.161290,109.090909,0.000000,2.888889,21.722222,8.277778,5.590909,4.863636,15.181818,2,14,62,2137,1.0,8.0,26.0,611.0,1,13,42,185,1,7,21,64,34176_4356,6.0,0.0,0.0,6.0,0.0,6.0,0.0,1,1,2,1,1,2,6.0,0.945652,0.945355,0.945652,0.934426,611.0
3,34176,2217,0,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,39620.0,67.0,1018.0,3787.0,12610.0,34.0,2703.0,363.0,1.0,0.0,1.0,0.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,38.919450,3.720039,4.665187,0.134295,0.033570,0.085130,0.072672,0.053869,0.111059,0.145842,0.497857,0.026874,0.041924,0.034668,0.021768,0.045418,0.099704,0.729643,96.238095,170.833333,141.129032,104.612903,222.866667,283.225806,2724.727273,4.761905,5.379310,4.777778,4.764706,7.041667,14.269231,452.500000,2,5,377,14199,1.0,5.0,158.0,913.0,2,5,174,4319,1,4,114,2428,34176_2217,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1,1,1,1,1,1,0.0,0.945652,0.945355,0.945652,0.934426,913.0
4,230784,4818,0,0.0,0.0,46.0,0.0,6.0,0.0,1.0,0.0,1.0,0.0,7.666667,0.000000,1.000000,0.000000,15,5,0.00000,0.518519,0.092593,0.000000,0.074074,0.037037,0.277778,0.000000,0.571429,0.142857,0.000000,0.000000,0.142857,0.142857,0.0,4.000000,5.000000,0.000000,1.333333,2.000000,3.750000,0.000000,1.333333,1.0,0.0,0.0,1.0,1.0,19,17,31,20,5.0,6.0,6.0,5.0,1,1,1,1,1,1,1,1,0.0,0.0,27211.0,103.0,583.0,1770.0,16057.0,26.0,2150.0,189.0,7.0,0.0,1.0,0.0,105.742450,6.229536,0.852016,141.133806,8.156852,1.000000,0.0_0.0,117.407391,6.637107,0.895509,0.725957,0.730747,46.674099,3.036021,7.468372,0.087907,0.027574,0.060430,0.040987,0.040321,0.048972,0.114143,0.667575,0.035492,0.047567,0.032565,0.024881,0.031467,0.037322,0.790706,63.142857,96.866667,63.580645,62.548387,78.500000,177.064516,2918.454545,4.619048,5.000000,3.708333,2.833333,3.185185,3.923077,360.166667,2,27,461,5569,1.0,17.0,126.0,332.0,2,25,302,3052,1,23,199,1285,230784_4818,6.0,0.0,0.0,1.0,0.0,1.0,0.0,1,1,1,1,1,1,6.0,0.885870,0.885246,0.885870,0.743169,332.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
260859,359807,4325,0,4.0,1.0,58.0,0.0,4.0,1.0,49.0,0.0,5.0,0.0,14.500000,0.250000,9.800000,0.000000,11,4,0.00000,0.000000,0.000000,0.000000,0.085470,0.196581,0.717949,0.000000,0.000000,0.000000,0.000000,0.111111,0.222222,0.666667,0.0,0.000000,0.000000,0.000000,3.333333,4.600000,21.000000,0.000000,0.000000,0.0,0.0,1.0,1.0,3.0,22,16,31,21,4.0,4.0,4.0,4.0,11,11,19,13,2,3,3,2,0.0,0.0,11616.0,29.0,1124.0,848.0,5095.0,18.0,996.0,63.0,18.0,0.0,2.0,0.0,147.542249,9.620345,0.716419,99.741350,6.668382,0.425959,4.0_1.0,124.098490,8.449806,0.317763,0.272280,0.330118,10.334520,0.754448,5.115462,0.063253,0.023599,0.054828,0.075092,0.078175,0.124817,0.191369,0.452120,0.030189,0.047642,0.079717,0.079245,0.108019,0.157075,0.498113,22.238095,36.166667,47.935484,49.903226,82.333333,122.161290,813.363636,3.047619,3.740741,5.451613,5.600000,7.633333,10.741935,96.000000,2,18,75,4412,1.0,15.0,41.0,849.0,2,14,38,1421,1,11,29,779,359807_4325,0.0,0.0,0.0,18.0,0.0,2.0,0.0,1,2,6,1,1,1,0.0,0.282609,0.273224,0.239130,0.202186,849.0
260860,294527,3971,0,0.0,1.0,121.0,0.0,1.0,24.0,41.0,0.0,4.0,7.0,121.000000,24.000000,10.250000,1.750000,5,1,0.00000,0.000000,0.000000,0.000000,0.101010,0.000000,0.898990,0.000000,0.000000,0.000000,0.000000,0.200000,0.000000,0.800000,0.0,0.000000,0.000000,0.000000,20.000000,0.000000,35.600000,0.000000,0.000000,0.0,0.0,1.0,0.0,4.0,22,16,73,23,1.0,1.0,1.0,1.0,20,10,22,20,4,4,4,4,0.0,0.0,17573.0,75.0,871.0,1414.0,7029.0,19.0,1737.0,174.0,13.0,0.0,1.0,3.0,105.742450,6.229536,0.852016,99.741350,6.668382,0.425959,0.0_1.0,77.416906,5.293887,0.303650,0.162313,0.197636,20.175660,1.623421,4.046632,0.100173,0.017825,0.073204,0.072200,0.066524,0.123944,0.133636,0.512668,0.020706,0.053298,0.049463,0.046396,0.084739,0.067868,0.677531,24.523810,70.500000,67.290323,62.000000,119.366667,124.548387,1346.545455,3.375000,5.560000,4.448276,4.033333,7.620690,5.709677,176.700000,2,7,271,5720,1.0,6.0,98.0,577.0,2,6,129,2669,1,6,79,1432,294527_3971,8.0,0.0,3.0,5.0,0.0,1.0,0.0,1,1,2,1,1,1,11.0,0.255435,0.251366,0.255435,0.000000,577.0
260861,294527,152,0,0.0,1.0,121.0,0.0,1.0,24.0,41.0,0.0,4.0,7.0,121.000000,24.000000,10.250000,1.750000,5,1,0.00000,0.000000,0.000000,0.000000,0.101010,0.000000,0.898990,0.000000,0.000000,0.000000,0.000000,0.200000,0.000000,0.800000,0.0,0.000000,0.000000,0.000000,20.000000,0.000000,35.600000,0.000000,0.000000,0.0,0.0,1.0,0.0,4.0,22,16,73,23,1.0,1.0,1.0,1.0,20,10,22,20,4,4,4,4,0.0,0.0,4303.0,8.0,219.0,458.0,2526.0,4.0,544.0,45.0,7.0,0.0,1.0,1.0,105.742450,6.229536,0.852016,99.741350,6.668382,0.425959,0.0_1.0,77.416906,5.293887,0.303650,0.162313,0.197636,19.648402,2.091324,4.643382,0.082721,0.002344,0.004811,0.026027,0.125941,0.096336,0.068583,0.675959,0.001311,0.002621,0.000000,0.171691,0.095675,0.011796,0.716907,1.900000,2.294118,11.722222,46.409091,26.931034,17.935484,498.181818,1.000000,1.000000,0.000000,26.200000,14.600000,2.250000,136.750000,2,13,162,1698,1.0,5.0,16.0,176.0,2,9,99,978,1,5,40,474,294527_152,0.0,0.0,0.0,7.0,0.0,1.0,1.0,1,1,1,1,1,1,0.0,0.255435,0.251366,0.255435,0.000000,176.0
260862,294527,2537,0,0.0,1.0,121.0,0.0,1.0,24.0,41.0,0.0,4.0,7.0,121.000000,24.000000,10.250000,1.750000,5,1,0.00000,0.000000,0.000000,0.000000,0.101010,0.000000,0.898990,0.000000,0.000000,0.000000,0.000000,0.200000,0.000000,0.800000,0.0,0.000000,0.000000,0.000000,20.000000,0.000000,35.600000,0.000000,0.000000,0.0,0.0,1.0,0.0,4.0,22,16,73,23,1.0,1.0,1.0,1.0,20,10,22,20,4,4,4,4,0.0,0.0,26137.0,85.0,1829.0,2038.0,13456.0,38.0,4231.0,280.0,0.0,0.0,1.0,0.0,105.742450,6.229536,0.852016,99.741350,6.668382,0.425959,0.0_1.0,77.416906,5.293887,0.303650,0.162313,0.197636,14.290323,1.114270,3.180336,0.066178,0.012496,0.089450,0.030461,0.072358,0.111594,0.105793,0.577848,0.015017,0.045710,0.018977,0.048515,0.093069,0.057426,0.721287,28.619048,143.400000,47.258065,112.258065,178.900000,164.129032,2526.454545,4.789474,10.653846,3.965517,9.483871,18.800000,11.225806,485.666667,2,20,221,8269,1.0,15.0,136.0,1414.0,2,18,144,3980,1,15,127,2935,294527_2537,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1,1,1,1,1,1,0.0,0.255435,0.251366,0.255435,0.000000,1414.0


# Age Gender Combined

In [114]:
df_combined_backup = df_combined.copy(deep = True)

# User-Merchant Similarity

# User-Brand

In [115]:
#brands bought by user before sale

temp = df[((df.action_type == 2)&(df.time_stamp != '1900-11-11'))].groupby(['user_id'])['brand_id']\
         .unique().reset_index()

temp = temp.rename(columns = {'brand_id':'user_brand_before_sale'})


#brands bought by user on sale

temp1 = df[((df.action_type == 2)&(df.time_stamp == '1900-11-11'))].groupby(['user_id'])['brand_id']\
         .unique().reset_index()

temp1 = temp1.rename(columns = {'brand_id':'user_brand_on_sale'})


user_brand = pd.merge(df_train, temp, on = 'user_id', how = 'left')

user_brand = pd.merge(user_brand, temp1, on = 'user_id', how = 'left')

In [116]:
temp.head(2)

Unnamed: 0,user_id,user_brand_before_sale
0,1,"[1469.0, 7402.0]"
1,2,"[4953.0, 8321.0, 2276.0, 1283.0]"


In [117]:
temp1.head(2)

Unnamed: 0,user_id,user_brand_on_sale
0,1,[6805.0]
1,2,"[7931.0, 8120.0, 7936.0, 856.0]"


In [118]:
#categories sold by merchant 

temp = df[df.action_type == 2].groupby(['seller_id'])['brand_id'].unique().reset_index()

temp = temp.rename(columns = {'brand_id':'seller_brand'})


user_brand = pd.merge(user_brand, temp, on = 'seller_id', how = 'left')


In [119]:
temp.head(2)

Unnamed: 0,seller_id,seller_brand
0,1,"[1662.0, 1104.0]"
1,2,[2921.0]


In [120]:
user_brand['index'] = user_brand.index

In [121]:
user_brand

Unnamed: 0,user_id,seller_id,label,age_range,gender,user_brand_before_sale,user_brand_on_sale,seller_brand,index
0,34176,3906,0,6.0,0.0,"[3589.0, 2714.0, 4094.0, 3489.0, 2276.0, 1954....","[4094.0, 6268.0, 2276.0, 2337.0, 3012.0, 7686.0]",[6268.0],0
1,34176,121,0,6.0,0.0,"[3589.0, 2714.0, 4094.0, 3489.0, 2276.0, 1954....","[4094.0, 6268.0, 2276.0, 2337.0, 3012.0, 7686.0]",[2337.0],1
2,34176,4356,1,6.0,0.0,"[3589.0, 2714.0, 4094.0, 3489.0, 2276.0, 1954....","[4094.0, 6268.0, 2276.0, 2337.0, 3012.0, 7686.0]",[2276.0],2
3,34176,2217,0,6.0,0.0,"[3589.0, 2714.0, 4094.0, 3489.0, 2276.0, 1954....","[4094.0, 6268.0, 2276.0, 2337.0, 3012.0, 7686.0]",[3012.0],3
4,230784,4818,0,0.0,0.0,"[1236.0, 2196.0, 2142.0, 3700.0, 5860.0]",[7606.0],[7606.0],4
...,...,...,...,...,...,...,...,...,...
260859,359807,4325,0,4.0,1.0,"[7447.0, 924.0, 7319.0, 653.0]","[2276.0, 8317.0]",[8317.0],260859
260860,294527,3971,0,0.0,1.0,[362.0],"[2142.0, 2773.0, 1921.0, 6066.0]",[2142.0],260860
260861,294527,152,0,0.0,1.0,[362.0],"[2142.0, 2773.0, 1921.0, 6066.0]",[2773.0],260861
260862,294527,2537,0,0.0,1.0,[362.0],"[2142.0, 2773.0, 1921.0, 6066.0]",[6066.0],260862


In [122]:
#Make a new column with user-merchant pairs together as a key
user_brand['user_merchant_pair'] = user_brand.user_id.astype(str).str.cat(user_brand.seller_id.astype(str), sep='_')


In [123]:
temp = user_brand.dropna().apply(lambda row: list ( set(row['user_brand_before_sale'])  & 
                                                    set(row['user_brand_on_sale']) ), axis=1)\
                 .reset_index().rename(columns = {0 : 'brands_intersection_user_before_on_sale'})


In [124]:
temp.head(2)

Unnamed: 0,index,brands_intersection_user_before_on_sale
0,0,"[2276.0, 4094.0]"
1,1,"[2276.0, 4094.0]"


In [125]:
user_brand = pd.merge(user_brand, temp, on = 'index', how = 'left')

In [126]:
temp = user_brand[(user_brand.brands_intersection_user_before_on_sale.str.len() > 0)]

temp['brand_intersection_user_before_on_sale'] = 1

temp = temp[['user_id', 'brand_intersection_user_before_on_sale']]

temp = temp.drop_duplicates()

df_combined = pd.merge(df_combined, temp, on = 'user_id', how = 'left')

df_combined = df_combined.fillna(0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [127]:
temp.head(2)

Unnamed: 0,user_id,brand_intersection_user_before_on_sale
0,34176,1
12,101760,1


In [128]:
temp = user_brand.dropna().apply(lambda row: list( set(row['user_brand_before_sale'])  | 
                                                 set(row['user_brand_on_sale']) ), axis=1)\
               .reset_index().rename(columns = {0 : 'brands_union_user_before_on_sale'})


In [129]:
temp.head(2)

Unnamed: 0,index,brands_union_user_before_on_sale
0,0,"[3012.0, 3589.0, 7686.0, 6279.0, 4874.0, 1422...."
1,1,"[3012.0, 3589.0, 7686.0, 6279.0, 4874.0, 1422...."


In [130]:
user_brand = pd.merge(user_brand, temp, on = 'index', how = 'left')

In [131]:
temp = user_brand.dropna().apply(lambda row: list ( set(row['user_brand_before_sale'])  & 
                                                   set(row['seller_brand']) ), axis=1)\
               .reset_index().rename(columns = {0 : 'brands_intersection_user_before_and_seller'})



In [132]:
temp.head(2)

Unnamed: 0,index,brands_intersection_user_before_and_seller
0,0,[]
1,1,[]


In [133]:
user_brand = pd.merge(user_brand, temp, on = 'index', how = 'left')

In [134]:
user_brand

Unnamed: 0,user_id,seller_id,label,age_range,gender,user_brand_before_sale,user_brand_on_sale,seller_brand,index,user_merchant_pair,brands_intersection_user_before_on_sale,brands_union_user_before_on_sale,brands_intersection_user_before_and_seller
0,34176,3906,0,6.0,0.0,"[3589.0, 2714.0, 4094.0, 3489.0, 2276.0, 1954....","[4094.0, 6268.0, 2276.0, 2337.0, 3012.0, 7686.0]",[6268.0],0,34176_3906,"[2276.0, 4094.0]","[3012.0, 3589.0, 7686.0, 6279.0, 4874.0, 1422....",[]
1,34176,121,0,6.0,0.0,"[3589.0, 2714.0, 4094.0, 3489.0, 2276.0, 1954....","[4094.0, 6268.0, 2276.0, 2337.0, 3012.0, 7686.0]",[2337.0],1,34176_121,"[2276.0, 4094.0]","[3012.0, 3589.0, 7686.0, 6279.0, 4874.0, 1422....",[]
2,34176,4356,1,6.0,0.0,"[3589.0, 2714.0, 4094.0, 3489.0, 2276.0, 1954....","[4094.0, 6268.0, 2276.0, 2337.0, 3012.0, 7686.0]",[2276.0],2,34176_4356,"[2276.0, 4094.0]","[3012.0, 3589.0, 7686.0, 6279.0, 4874.0, 1422....",[2276.0]
3,34176,2217,0,6.0,0.0,"[3589.0, 2714.0, 4094.0, 3489.0, 2276.0, 1954....","[4094.0, 6268.0, 2276.0, 2337.0, 3012.0, 7686.0]",[3012.0],3,34176_2217,"[2276.0, 4094.0]","[3012.0, 3589.0, 7686.0, 6279.0, 4874.0, 1422....",[]
4,230784,4818,0,0.0,0.0,"[1236.0, 2196.0, 2142.0, 3700.0, 5860.0]",[7606.0],[7606.0],4,230784_4818,[],"[5860.0, 3700.0, 2196.0, 1236.0, 7606.0, 2142.0]",[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...
260859,359807,4325,0,4.0,1.0,"[7447.0, 924.0, 7319.0, 653.0]","[2276.0, 8317.0]",[8317.0],260859,359807_4325,[],"[7447.0, 2276.0, 8317.0, 7319.0, 924.0, 653.0]",[]
260860,294527,3971,0,0.0,1.0,[362.0],"[2142.0, 2773.0, 1921.0, 6066.0]",[2142.0],260860,294527_3971,[],"[1921.0, 6066.0, 2773.0, 362.0, 2142.0]",[]
260861,294527,152,0,0.0,1.0,[362.0],"[2142.0, 2773.0, 1921.0, 6066.0]",[2773.0],260861,294527_152,[],"[1921.0, 6066.0, 2773.0, 362.0, 2142.0]",[]
260862,294527,2537,0,0.0,1.0,[362.0],"[2142.0, 2773.0, 1921.0, 6066.0]",[6066.0],260862,294527_2537,[],"[1921.0, 6066.0, 2773.0, 362.0, 2142.0]",[]


In [135]:
temp = user_brand[user_brand.brands_intersection_user_before_and_seller.str.len() > 0]

temp['brand_intersection_user_before_and_seller'] = 1

temp = temp[['user_merchant_pair', 'brand_intersection_user_before_and_seller']]

temp = temp.drop_duplicates()

df_combined = pd.merge(df_combined, temp, on = 'user_merchant_pair', how = 'left')

df_combined = df_combined.fillna(0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [136]:
temp.head(2)

Unnamed: 0,user_merchant_pair,brand_intersection_user_before_and_seller
2,34176_4356,1
79,311424_1703,1


In [137]:
temp = user_brand.dropna().apply(lambda row: list ( set(row['user_brand_before_sale'])  | 
                                                   set(row['seller_brand']) ), axis=1)\
                .reset_index().rename(columns = {0 : 'brands_union_user_before_and_seller'})



In [138]:
temp.head(2)

Unnamed: 0,index,brands_union_user_before_and_seller
0,0,"[3589.0, 6279.0, 4874.0, 1422.0, 2197.0, 8473...."
1,1,"[3589.0, 6279.0, 4874.0, 1422.0, 2197.0, 8473...."


In [139]:
user_brand = pd.merge(user_brand, temp, on = 'index', how = 'left')

In [140]:
user_brand['brands_intersection_user_before_on_sale_len'] = user_brand.brands_intersection_user_before_on_sale.str.len()
                                                                       

user_brand['brands_union_user_before_on_sale_len'] = user_brand.brands_union_user_before_on_sale.str.len()
                                                                       

user_brand['brands_intersection_user_before_and_seller_len'] = user_brand.brands_intersection_user_before_and_seller.str.len()
                                                                       

user_brand['brands_union_user_before_and_seller_len'] = user_brand.brands_union_user_before_and_seller.str.len()
                                                                       


In [141]:
temp = user_brand[['user_id','seller_id','label','user_merchant_pair','brands_intersection_user_before_on_sale_len',
                'brands_union_user_before_on_sale_len','brands_intersection_user_before_and_seller_len',
                'brands_union_user_before_and_seller_len']]

temp = temp.fillna(0)

temp['ratio_brand_int_union_user_before_on_sale'] = temp['brands_intersection_user_before_on_sale_len']/temp['brands_union_user_before_on_sale_len']


temp['ratio_brand_int_union_user_before_seller'] = temp['brands_intersection_user_before_and_seller_len']/temp['brands_union_user_before_and_seller_len']

temp = temp.fillna(0)


In [142]:
temp

Unnamed: 0,user_id,seller_id,label,user_merchant_pair,brands_intersection_user_before_on_sale_len,brands_union_user_before_on_sale_len,brands_intersection_user_before_and_seller_len,brands_union_user_before_and_seller_len,ratio_brand_int_union_user_before_on_sale,ratio_brand_int_union_user_before_seller
0,34176,3906,0,34176_3906,2.0,21.0,0.0,18.0,0.095238,0.000000
1,34176,121,0,34176_121,2.0,21.0,0.0,18.0,0.095238,0.000000
2,34176,4356,1,34176_4356,2.0,21.0,1.0,17.0,0.095238,0.058824
3,34176,2217,0,34176_2217,2.0,21.0,0.0,18.0,0.095238,0.000000
4,230784,4818,0,230784_4818,0.0,6.0,0.0,6.0,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...
260859,359807,4325,0,359807_4325,0.0,6.0,0.0,5.0,0.000000,0.000000
260860,294527,3971,0,294527_3971,0.0,5.0,0.0,2.0,0.000000,0.000000
260861,294527,152,0,294527_152,0.0,5.0,0.0,2.0,0.000000,0.000000
260862,294527,2537,0,294527_2537,0.0,5.0,0.0,2.0,0.000000,0.000000


In [143]:
df_combined.head(2)

Unnamed: 0,user_id,seller_id,label,age_range,gender,userbeforesale0,userbeforesale1,userbeforesale2,userbeforesale3,useronsale0,useronsale1,useronsale2,useronsale3,user_ratio_0_to_2_before_sale,user_ratio_3_to_2_before_sale,user_ratio_0_to_2_on_sale,user_ratio_3_to_2_on_sale,active_days,purchase_days,useractivity5,useractivity6,useractivity7,useractivity8,useractivity9,useractivity10,useractivity11,userpurchase5,userpurchase6,userpurchase7,userpurchase8,userpurchase9,userpurchase10,userpurchase11,useravgactivity5,useravgactivity6,useravgactivity7,useravgactivity8,useravgactivity9,useravgactivity10,useravgactivity11,useravgpurchase5,useravgpurchase6,useravgpurchase7,useravgpurchase8,useravgpurchase9,useravgpurchase10,useravgpurchase11,user_unique_brands_before_sale,user_unique_categories_before_sale,user_unique_items_before_sale,user_unique_sellers_before_sale,user_unique_brands_purchased_before_sale,user_unique_categories_purchased_before_sale,user_unique_items_purchased_before_sale,user_unique_sellers_purchased_before_sale,user_unique_brands_on_sale,user_unique_categories_on_sale,user_unique_items_on_sale,user_unique_sellers_on_sale,user_unique_brands_purchased_on_sale,user_unique_categories_purchased_on_sale,user_unique_items_purchased_on_sale,user_unique_sellers_purchased_on_sale,user_all_month_purchase,user_purchase_only_11,sellerbeforesale0,sellerbeforesale1,sellerbeforesale2,sellerbeforesale3,selleronsale0,selleronsale1,selleronsale2,selleronsale3,userseller0,userseller1,userseller2,userseller3,age_avg_activity,age_avg_purchase,unique_users_age_norm,gender_avg_activity,gender_avg_purchase,unique_users_gender_norm,age_gender,age_gender_avg_activity,age_gender_avg_purchase,unique_users_age_gender_norm,age_gender_active_days_norm,age_gender_purchase_days_norm,seller_ratio_0_to_2_before_sale,seller_ratio_3_to_2_before_sale,seller_ratio_0_to_2_on_sale,seller_ratio_3_to_2_on_sale,selleractivity5,selleractivity6,selleractivity7,selleractivity8,selleractivity9,selleractivity10,selleractivity11,sellerpurchase5,sellerpurchase6,sellerpurchase7,sellerpurchase8,sellerpurchase9,sellerpurchase10,sellerpurchase11,selleravgactivity5,selleravgactivity6,selleravgactivity7,selleravgactivity8,selleravgactivity9,selleravgactivity10,selleravgactivity11,selleravgpurchase5,selleravgpurchase6,selleravgpurchase7,selleravgpurchase8,selleravgpurchase9,selleravgpurchase10,selleravgpurchase11,seller_unique_brands_before_sale,seller_unique_categories_before_sale,seller_unique_items_before_sale,seller_unique_users_before_sale,seller_unique_brands_purchased_before_sale,seller_unique_categories_purchased_before_sale,seller_unique_items_purchased_before_sale,seller_unique_users_purchased_before_sale,seller_unique_brands_on_sale,seller_unique_categories_on_sale,seller_unique_items_on_sale,seller_unique_users_on_sale,seller_unique_brands_purchased_on_sale,seller_unique_categories_purchased_on_sale,seller_unique_items_purchased_on_sale,seller_unique_users_purchased_on_sale,user_merchant_pair,usersellerbeforesale0,usersellerbeforesale1,usersellerbeforesale3,userselleronsale0,userselleronsale1,userselleronsale2,userselleronsale3,um_unique_brands_on_sale,um_unique_categories_on_sale,um_unique_items_on_sale,um_unique_brands_purchased_on_sale,um_unique_categories_purchased_on_sale,um_unique_items_purchased_on_sale,um_activity_before_sale,activity_diff_from_1111,activity_diff_from_second_last,purchase_diff_from_1111,purchase_diff_from_second_last,seller_count_of_label-1,brand_intersection_user_before_on_sale,brand_intersection_user_before_and_seller
0,34176,3906,0,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.26087,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.27051,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.8,3.8,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,13203.0,26.0,233.0,937.0,1667.0,2.0,177.0,24.0,36.0,0.0,1.0,2.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.0,6.0_0.0,170.208267,9.331121,0.337225,0.39632,0.386876,56.665236,4.021459,9.418079,0.135593,0.082918,0.163563,0.086545,0.097117,0.157908,0.153544,0.258406,0.121951,0.15122,0.034146,0.039024,0.14878,0.070732,0.434146,64.238095,88.7,45.419355,50.967742,85.633333,80.580645,382.181818,3.571429,2.695652,1.555556,2.285714,3.588235,1.705882,89.0,2,20,307,5330,1.0,14.0,68.0,167.0,2,17,189,672,1,13,54,145,34176_3906,33.0,0.0,2.0,3.0,0.0,1.0,0.0,1,1,1,1,1,1,35.0,0.945652,0.945355,0.945652,0.934426,167.0,1.0,0.0
1,34176,121,0,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.26087,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.27051,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.8,3.8,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,48956.0,90.0,1565.0,2441.0,23309.0,31.0,3215.0,258.0,13.0,0.0,1.0,0.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.0,6.0_0.0,170.208267,9.331121,0.337225,0.39632,0.386876,31.281789,1.559744,7.250078,0.080249,0.055569,0.036349,0.06511,0.056057,0.130558,0.082564,0.573793,0.047071,0.016109,0.062552,0.03431,0.093933,0.055858,0.690167,211.333333,96.766667,167.741935,144.419355,347.566667,212.709677,4166.0,10.714286,4.052632,12.458333,6.074074,14.966667,15.705882,412.375,2,26,1170,8655,1.0,19.0,378.0,958.0,2,18,369,3942,1,16,276,1884,34176_121,8.0,0.0,0.0,5.0,0.0,1.0,0.0,1,1,1,1,1,1,8.0,0.945652,0.945355,0.945652,0.934426,958.0,1.0,0.0


In [144]:
df_combined = pd.merge(df_combined, temp[['user_id','ratio_brand_int_union_user_before_on_sale']],
                       on = 'user_id', how = 'left')

In [145]:
df_combined = pd.merge(df_combined, temp[['user_merchant_pair','ratio_brand_int_union_user_before_seller']],
                      on = 'user_merchant_pair', how = 'left')

In [146]:
df_combined = df_combined.drop_duplicates()

In [147]:
df_combined

Unnamed: 0,user_id,seller_id,label,age_range,gender,userbeforesale0,userbeforesale1,userbeforesale2,userbeforesale3,useronsale0,useronsale1,useronsale2,useronsale3,user_ratio_0_to_2_before_sale,user_ratio_3_to_2_before_sale,user_ratio_0_to_2_on_sale,user_ratio_3_to_2_on_sale,active_days,purchase_days,useractivity5,useractivity6,useractivity7,useractivity8,useractivity9,useractivity10,useractivity11,userpurchase5,userpurchase6,userpurchase7,userpurchase8,userpurchase9,userpurchase10,userpurchase11,useravgactivity5,useravgactivity6,useravgactivity7,useravgactivity8,useravgactivity9,useravgactivity10,useravgactivity11,useravgpurchase5,useravgpurchase6,useravgpurchase7,useravgpurchase8,useravgpurchase9,useravgpurchase10,useravgpurchase11,user_unique_brands_before_sale,user_unique_categories_before_sale,user_unique_items_before_sale,user_unique_sellers_before_sale,user_unique_brands_purchased_before_sale,user_unique_categories_purchased_before_sale,user_unique_items_purchased_before_sale,user_unique_sellers_purchased_before_sale,user_unique_brands_on_sale,user_unique_categories_on_sale,user_unique_items_on_sale,user_unique_sellers_on_sale,user_unique_brands_purchased_on_sale,user_unique_categories_purchased_on_sale,user_unique_items_purchased_on_sale,user_unique_sellers_purchased_on_sale,user_all_month_purchase,user_purchase_only_11,sellerbeforesale0,sellerbeforesale1,sellerbeforesale2,sellerbeforesale3,selleronsale0,selleronsale1,selleronsale2,selleronsale3,userseller0,userseller1,userseller2,userseller3,age_avg_activity,age_avg_purchase,unique_users_age_norm,gender_avg_activity,gender_avg_purchase,unique_users_gender_norm,age_gender,age_gender_avg_activity,age_gender_avg_purchase,unique_users_age_gender_norm,age_gender_active_days_norm,age_gender_purchase_days_norm,seller_ratio_0_to_2_before_sale,seller_ratio_3_to_2_before_sale,seller_ratio_0_to_2_on_sale,seller_ratio_3_to_2_on_sale,selleractivity5,selleractivity6,selleractivity7,selleractivity8,selleractivity9,selleractivity10,selleractivity11,sellerpurchase5,sellerpurchase6,sellerpurchase7,sellerpurchase8,sellerpurchase9,sellerpurchase10,sellerpurchase11,selleravgactivity5,selleravgactivity6,selleravgactivity7,selleravgactivity8,selleravgactivity9,selleravgactivity10,selleravgactivity11,selleravgpurchase5,selleravgpurchase6,selleravgpurchase7,selleravgpurchase8,selleravgpurchase9,selleravgpurchase10,selleravgpurchase11,seller_unique_brands_before_sale,seller_unique_categories_before_sale,seller_unique_items_before_sale,seller_unique_users_before_sale,seller_unique_brands_purchased_before_sale,seller_unique_categories_purchased_before_sale,seller_unique_items_purchased_before_sale,seller_unique_users_purchased_before_sale,seller_unique_brands_on_sale,seller_unique_categories_on_sale,seller_unique_items_on_sale,seller_unique_users_on_sale,seller_unique_brands_purchased_on_sale,seller_unique_categories_purchased_on_sale,seller_unique_items_purchased_on_sale,seller_unique_users_purchased_on_sale,user_merchant_pair,usersellerbeforesale0,usersellerbeforesale1,usersellerbeforesale3,userselleronsale0,userselleronsale1,userselleronsale2,userselleronsale3,um_unique_brands_on_sale,um_unique_categories_on_sale,um_unique_items_on_sale,um_unique_brands_purchased_on_sale,um_unique_categories_purchased_on_sale,um_unique_items_purchased_on_sale,um_activity_before_sale,activity_diff_from_1111,activity_diff_from_second_last,purchase_diff_from_1111,purchase_diff_from_second_last,seller_count_of_label-1,brand_intersection_user_before_on_sale,brand_intersection_user_before_and_seller,ratio_brand_int_union_user_before_on_sale,ratio_brand_int_union_user_before_seller
0,34176,3906,0,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,13203.0,26.0,233.0,937.0,1667.0,2.0,177.0,24.0,36.0,0.0,1.0,2.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,56.665236,4.021459,9.418079,0.135593,0.082918,0.163563,0.086545,0.097117,0.157908,0.153544,0.258406,0.121951,0.151220,0.034146,0.039024,0.148780,0.070732,0.434146,64.238095,88.700000,45.419355,50.967742,85.633333,80.580645,382.181818,3.571429,2.695652,1.555556,2.285714,3.588235,1.705882,89.000000,2,20,307,5330,1.0,14.0,68.0,167.0,2,17,189,672,1,13,54,145,34176_3906,33.0,0.0,2.0,3.0,0.0,1.0,0.0,1,1,1,1,1,1,35.0,0.945652,0.945355,0.945652,0.934426,167.0,1.0,0.0,0.095238,0.000000
4,34176,121,0,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,48956.0,90.0,1565.0,2441.0,23309.0,31.0,3215.0,258.0,13.0,0.0,1.0,0.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,31.281789,1.559744,7.250078,0.080249,0.055569,0.036349,0.065110,0.056057,0.130558,0.082564,0.573793,0.047071,0.016109,0.062552,0.034310,0.093933,0.055858,0.690167,211.333333,96.766667,167.741935,144.419355,347.566667,212.709677,4166.000000,10.714286,4.052632,12.458333,6.074074,14.966667,15.705882,412.375000,2,26,1170,8655,1.0,19.0,378.0,958.0,2,18,369,3942,1,16,276,1884,34176_121,8.0,0.0,0.0,5.0,0.0,1.0,0.0,1,1,1,1,1,1,8.0,0.945652,0.945355,0.945652,0.934426,958.0,1.0,0.0,0.095238,0.000000
8,34176,4356,1,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,5724.0,16.0,858.0,188.0,370.0,0.0,105.0,8.0,12.0,0.0,6.0,0.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,6.671329,0.219114,3.523810,0.076190,0.000550,0.020911,0.394690,0.180355,0.126840,0.111570,0.165085,0.000000,0.026999,0.406023,0.154725,0.127726,0.111111,0.173416,1.333333,8.000000,95.633333,42.290323,30.733333,26.161290,109.090909,0.000000,2.888889,21.722222,8.277778,5.590909,4.863636,15.181818,2,14,62,2137,1.0,8.0,26.0,611.0,1,13,42,185,1,7,21,64,34176_4356,6.0,0.0,0.0,6.0,0.0,6.0,0.0,1,1,2,1,1,2,6.0,0.945652,0.945355,0.945652,0.934426,611.0,1.0,1.0,0.095238,0.058824
12,34176,2217,0,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,39620.0,67.0,1018.0,3787.0,12610.0,34.0,2703.0,363.0,1.0,0.0,1.0,0.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,38.919450,3.720039,4.665187,0.134295,0.033570,0.085130,0.072672,0.053869,0.111059,0.145842,0.497857,0.026874,0.041924,0.034668,0.021768,0.045418,0.099704,0.729643,96.238095,170.833333,141.129032,104.612903,222.866667,283.225806,2724.727273,4.761905,5.379310,4.777778,4.764706,7.041667,14.269231,452.500000,2,5,377,14199,1.0,5.0,158.0,913.0,2,5,174,4319,1,4,114,2428,34176_2217,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1,1,1,1,1,1,0.0,0.945652,0.945355,0.945652,0.934426,913.0,1.0,0.0,0.095238,0.000000
16,230784,4818,0,0.0,0.0,46.0,0.0,6.0,0.0,1.0,0.0,1.0,0.0,7.666667,0.000000,1.000000,0.000000,15,5,0.00000,0.518519,0.092593,0.000000,0.074074,0.037037,0.277778,0.000000,0.571429,0.142857,0.000000,0.000000,0.142857,0.142857,0.0,4.000000,5.000000,0.000000,1.333333,2.000000,3.750000,0.000000,1.333333,1.0,0.0,0.0,1.0,1.0,19,17,31,20,5.0,6.0,6.0,5.0,1,1,1,1,1,1,1,1,0.0,0.0,27211.0,103.0,583.0,1770.0,16057.0,26.0,2150.0,189.0,7.0,0.0,1.0,0.0,105.742450,6.229536,0.852016,141.133806,8.156852,1.000000,0.0_0.0,117.407391,6.637107,0.895509,0.725957,0.730747,46.674099,3.036021,7.468372,0.087907,0.027574,0.060430,0.040987,0.040321,0.048972,0.114143,0.667575,0.035492,0.047567,0.032565,0.024881,0.031467,0.037322,0.790706,63.142857,96.866667,63.580645,62.548387,78.500000,177.064516,2918.454545,4.619048,5.000000,3.708333,2.833333,3.185185,3.923077,360.166667,2,27,461,5569,1.0,17.0,126.0,332.0,2,25,302,3052,1,23,199,1285,230784_4818,6.0,0.0,0.0,1.0,0.0,1.0,0.0,1,1,1,1,1,1,6.0,0.885870,0.885246,0.885870,0.743169,332.0,0.0,0.0,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
387647,359807,4325,0,4.0,1.0,58.0,0.0,4.0,1.0,49.0,0.0,5.0,0.0,14.500000,0.250000,9.800000,0.000000,11,4,0.00000,0.000000,0.000000,0.000000,0.085470,0.196581,0.717949,0.000000,0.000000,0.000000,0.000000,0.111111,0.222222,0.666667,0.0,0.000000,0.000000,0.000000,3.333333,4.600000,21.000000,0.000000,0.000000,0.0,0.0,1.0,1.0,3.0,22,16,31,21,4.0,4.0,4.0,4.0,11,11,19,13,2,3,3,2,0.0,0.0,11616.0,29.0,1124.0,848.0,5095.0,18.0,996.0,63.0,18.0,0.0,2.0,0.0,147.542249,9.620345,0.716419,99.741350,6.668382,0.425959,4.0_1.0,124.098490,8.449806,0.317763,0.272280,0.330118,10.334520,0.754448,5.115462,0.063253,0.023599,0.054828,0.075092,0.078175,0.124817,0.191369,0.452120,0.030189,0.047642,0.079717,0.079245,0.108019,0.157075,0.498113,22.238095,36.166667,47.935484,49.903226,82.333333,122.161290,813.363636,3.047619,3.740741,5.451613,5.600000,7.633333,10.741935,96.000000,2,18,75,4412,1.0,15.0,41.0,849.0,2,14,38,1421,1,11,29,779,359807_4325,0.0,0.0,0.0,18.0,0.0,2.0,0.0,1,2,6,1,1,1,0.0,0.282609,0.273224,0.239130,0.202186,849.0,0.0,0.0,0.000000,0.000000
387648,294527,3971,0,0.0,1.0,121.0,0.0,1.0,24.0,41.0,0.0,4.0,7.0,121.000000,24.000000,10.250000,1.750000,5,1,0.00000,0.000000,0.000000,0.000000,0.101010,0.000000,0.898990,0.000000,0.000000,0.000000,0.000000,0.200000,0.000000,0.800000,0.0,0.000000,0.000000,0.000000,20.000000,0.000000,35.600000,0.000000,0.000000,0.0,0.0,1.0,0.0,4.0,22,16,73,23,1.0,1.0,1.0,1.0,20,10,22,20,4,4,4,4,0.0,0.0,17573.0,75.0,871.0,1414.0,7029.0,19.0,1737.0,174.0,13.0,0.0,1.0,3.0,105.742450,6.229536,0.852016,99.741350,6.668382,0.425959,0.0_1.0,77.416906,5.293887,0.303650,0.162313,0.197636,20.175660,1.623421,4.046632,0.100173,0.017825,0.073204,0.072200,0.066524,0.123944,0.133636,0.512668,0.020706,0.053298,0.049463,0.046396,0.084739,0.067868,0.677531,24.523810,70.500000,67.290323,62.000000,119.366667,124.548387,1346.545455,3.375000,5.560000,4.448276,4.033333,7.620690,5.709677,176.700000,2,7,271,5720,1.0,6.0,98.0,577.0,2,6,129,2669,1,6,79,1432,294527_3971,8.0,0.0,3.0,5.0,0.0,1.0,0.0,1,1,2,1,1,1,11.0,0.255435,0.251366,0.255435,0.000000,577.0,0.0,0.0,0.000000,0.000000
387651,294527,152,0,0.0,1.0,121.0,0.0,1.0,24.0,41.0,0.0,4.0,7.0,121.000000,24.000000,10.250000,1.750000,5,1,0.00000,0.000000,0.000000,0.000000,0.101010,0.000000,0.898990,0.000000,0.000000,0.000000,0.000000,0.200000,0.000000,0.800000,0.0,0.000000,0.000000,0.000000,20.000000,0.000000,35.600000,0.000000,0.000000,0.0,0.0,1.0,0.0,4.0,22,16,73,23,1.0,1.0,1.0,1.0,20,10,22,20,4,4,4,4,0.0,0.0,4303.0,8.0,219.0,458.0,2526.0,4.0,544.0,45.0,7.0,0.0,1.0,1.0,105.742450,6.229536,0.852016,99.741350,6.668382,0.425959,0.0_1.0,77.416906,5.293887,0.303650,0.162313,0.197636,19.648402,2.091324,4.643382,0.082721,0.002344,0.004811,0.026027,0.125941,0.096336,0.068583,0.675959,0.001311,0.002621,0.000000,0.171691,0.095675,0.011796,0.716907,1.900000,2.294118,11.722222,46.409091,26.931034,17.935484,498.181818,1.000000,1.000000,0.000000,26.200000,14.600000,2.250000,136.750000,2,13,162,1698,1.0,5.0,16.0,176.0,2,9,99,978,1,5,40,474,294527_152,0.0,0.0,0.0,7.0,0.0,1.0,1.0,1,1,1,1,1,1,0.0,0.255435,0.251366,0.255435,0.000000,176.0,0.0,0.0,0.000000,0.000000
387654,294527,2537,0,0.0,1.0,121.0,0.0,1.0,24.0,41.0,0.0,4.0,7.0,121.000000,24.000000,10.250000,1.750000,5,1,0.00000,0.000000,0.000000,0.000000,0.101010,0.000000,0.898990,0.000000,0.000000,0.000000,0.000000,0.200000,0.000000,0.800000,0.0,0.000000,0.000000,0.000000,20.000000,0.000000,35.600000,0.000000,0.000000,0.0,0.0,1.0,0.0,4.0,22,16,73,23,1.0,1.0,1.0,1.0,20,10,22,20,4,4,4,4,0.0,0.0,26137.0,85.0,1829.0,2038.0,13456.0,38.0,4231.0,280.0,0.0,0.0,1.0,0.0,105.742450,6.229536,0.852016,99.741350,6.668382,0.425959,0.0_1.0,77.416906,5.293887,0.303650,0.162313,0.197636,14.290323,1.114270,3.180336,0.066178,0.012496,0.089450,0.030461,0.072358,0.111594,0.105793,0.577848,0.015017,0.045710,0.018977,0.048515,0.093069,0.057426,0.721287,28.619048,143.400000,47.258065,112.258065,178.900000,164.129032,2526.454545,4.789474,10.653846,3.965517,9.483871,18.800000,11.225806,485.666667,2,20,221,8269,1.0,15.0,136.0,1414.0,2,18,144,3980,1,15,127,2935,294527_2537,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1,1,1,1,1,1,0.0,0.255435,0.251366,0.255435,0.000000,1414.0,0.0,0.0,0.000000,0.000000


# User-Category

In [148]:
#categories bought by user before sale

temp = df[((df.action_type == 2)&(df.time_stamp != '1900-11-11'))].groupby(['user_id'])['cat_id']\
         .unique().reset_index()

temp = temp.rename(columns = {'cat_id':'user_cat_before_sale'})


#categories bought by user on sale

temp1 = df[((df.action_type == 2)&(df.time_stamp == '1900-11-11'))].groupby(['user_id'])['cat_id']\
         .unique().reset_index()

temp1 = temp1.rename(columns = {'cat_id':'user_cat_on_sale'})


user_cat = pd.merge(df_train, temp, on = 'user_id', how = 'left')

user_cat = pd.merge(user_cat, temp1, on = 'user_id', how = 'left')

In [149]:
#categories sold by merchant 

temp = df[df.action_type == 2].groupby(['seller_id'])['cat_id'].unique().reset_index()

temp = temp.rename(columns = {'cat_id':'seller_cat'})


user_cat = pd.merge(user_cat, temp, on = 'seller_id', how = 'left')


In [150]:
user_cat['index'] = user_cat.index

In [151]:
#Make a new column with user-merchant pairs together as a key
user_cat['user_merchant_pair'] = user_cat.user_id.astype(str).str.cat(user_cat.seller_id.astype(str), sep='_')


In [152]:
temp = user_cat.dropna().apply(lambda row: list( set(row['user_cat_before_sale'])  & 
                                                 set(row['user_cat_on_sale']) ), axis=1)\
               .reset_index().rename(columns = {0 : 'categories_intersection_user_before_on_sale'})


In [153]:
user_cat = pd.merge(user_cat, temp, on = 'index', how = 'left')

In [154]:
temp = user_cat[user_cat.categories_intersection_user_before_on_sale.str.len() > 0]

temp['cat_intersection_user_before_on_sale'] = 1

temp = temp[['user_id', 'cat_intersection_user_before_on_sale']]

temp = temp.drop_duplicates()

df_combined = pd.merge(df_combined, temp, on = 'user_id', how = 'left')

df_combined = df_combined.fillna(0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [155]:
temp = user_cat.dropna().apply(lambda row: list( set(row['user_cat_before_sale'])  | 
                                                 set(row['user_cat_on_sale']) ), axis=1)\
               .reset_index().rename(columns = {0 : 'categories_union_user_before_on_sale'})


In [156]:
user_cat = pd.merge(user_cat, temp, on = 'index', how = 'left')

In [157]:
temp = user_cat.dropna().apply(lambda row: list ( set(row['user_cat_before_sale'])  & 
                                                   set(row['seller_cat']) ), axis=1)\
               .reset_index().rename(columns = {0 : 'categories_intersection_user_before_and_seller'})



In [158]:
user_cat = pd.merge(user_cat, temp, on = 'index', how = 'left')

In [159]:
temp = user_cat[user_cat.categories_intersection_user_before_and_seller.str.len() > 0]

temp['cat_intersection_user_before_and_seller'] = 1

temp = temp[['user_merchant_pair','cat_intersection_user_before_and_seller']]

temp = temp.drop_duplicates()

df_combined = pd.merge(df_combined, temp, on = 'user_merchant_pair', how = 'left')

df_combined = df_combined.fillna(0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [160]:
temp = user_cat.dropna().apply(lambda row: list ( set(row['user_cat_before_sale'])  | 
                                                   set(row['seller_cat']) ), axis=1)\
                .reset_index().rename(columns = {0 : 'categories_union_user_before_and_seller'})



In [161]:
user_cat = pd.merge(user_cat, temp, on = 'index', how = 'left')

In [162]:
user_cat.iloc[0,5]

array([1467,  629,  420, 1553, 1504,  662, 1208,  615, 1271,  267,  602,
       1505, 1252,  955,  664,  302])

In [163]:
user_cat.iloc[0,6]

array([1238,  821, 1208, 1028,  656,  602])

In [164]:
user_cat.iloc[0,7]

array([ 821,  302, 1577, 1397,  662, 1095, 1238, 1142,  154,  748,  612,
       1505, 1389, 1604,  384, 1553,  387,  992])

In [165]:
temp = user_cat.dropna().apply(lambda row: list ( set(row['user_cat_before_sale']) |
                                                 set(row['user_cat_on_sale']) |
                                                   set(row['seller_cat']) ), axis=1)\
                .reset_index().rename(columns = {0 : 'categories_union_user_and_seller'})



In [166]:
user_cat = pd.merge(user_cat, temp, on = 'index', how = 'left')

In [167]:
temp = user_cat.dropna().apply(lambda row: list ( (set(row['user_cat_before_sale']) |
                                                 set(row['user_cat_on_sale'])) &
                                                   set(row['seller_cat']) ), axis=1)\
                .reset_index().rename(columns = {0 : 'categories_int_user_and_seller'})



In [168]:
user_cat = pd.merge(user_cat, temp, on = 'index', how = 'left')

In [169]:
user_cat.head(1)

Unnamed: 0,user_id,seller_id,label,age_range,gender,user_cat_before_sale,user_cat_on_sale,seller_cat,index,user_merchant_pair,categories_intersection_user_before_on_sale,categories_union_user_before_on_sale,categories_intersection_user_before_and_seller,categories_union_user_before_and_seller,categories_union_user_and_seller,categories_int_user_and_seller
0,34176,3906,0,6.0,0.0,"[1467, 629, 420, 1553, 1504, 662, 1208, 615, 1...","[1238, 821, 1208, 1028, 656, 602]","[821, 302, 1577, 1397, 662, 1095, 1238, 1142, ...",0,34176_3906,"[1208, 602]","[1028, 267, 656, 1553, 1467, 662, 1238, 664, 6...","[1553, 1505, 662, 302]","[384, 387, 1604, 1095, 267, 1553, 1467, 662, 1...","[384, 387, 1028, 267, 656, 1553, 662, 664, 154...","[1505, 302, 1553, 821, 662, 1238]"


In [170]:
user_cat['categories_intersection_user_before_on_sale_len'] = user_cat.categories_intersection_user_before_on_sale.str.len()
                                                                       

user_cat['categories_union_user_before_on_sale_len'] = user_cat.categories_union_user_before_on_sale.str.len()
                                                                       

user_cat['categories_intersection_user_before_and_seller_len'] = user_cat.categories_intersection_user_before_and_seller.str.len()
                                                                       

user_cat['categories_union_user_before_and_seller_len'] = user_cat.categories_union_user_before_and_seller.str.len()
                                                                       
user_cat['categories_union_user_and_seller_len'] = user_cat.categories_union_user_and_seller.str.len()
 
user_cat['categories_int_user_and_seller_len'] = user_cat.categories_int_user_and_seller.str.len()
    
    

In [171]:
temp = user_cat[['user_id','seller_id','label','user_merchant_pair','categories_intersection_user_before_on_sale_len',
                'categories_union_user_before_on_sale_len','categories_intersection_user_before_and_seller_len',
                'categories_union_user_before_and_seller_len','categories_union_user_and_seller_len',
                'categories_int_user_and_seller_len']]

temp = temp.fillna(0)

temp['ratio_cat_int_union_user_before_on_sale'] = temp['categories_intersection_user_before_on_sale_len']/temp['categories_union_user_before_on_sale_len']


temp['ratio_cat_int_union_user_before_seller'] = temp['categories_intersection_user_before_and_seller_len']/temp['categories_union_user_before_and_seller_len']

temp['ratio_cat_int_union_user_seller'] = temp['categories_int_user_and_seller_len']/temp['categories_union_user_and_seller_len']


temp = temp.fillna(0)


In [172]:
temp1

Unnamed: 0,user_id,user_cat_on_sale
0,1,[992]
1,2,"[1130, 500, 420, 1142]"
2,3,[606]
3,4,[276]
4,5,[407]
...,...,...
424165,424166,[252]
424166,424167,[1208]
424167,424168,[1603]
424168,424169,[1401]


In [173]:
df_combined = pd.merge(df_combined, temp[['user_id','ratio_cat_int_union_user_before_on_sale']],
                      on = 'user_id', how = 'left')

In [174]:
df_combined = pd.merge(df_combined, temp[['user_merchant_pair','ratio_cat_int_union_user_before_seller']],
                      on = 'user_merchant_pair', how = 'left')

In [175]:
df_combined = pd.merge(df_combined, temp[['user_merchant_pair','ratio_cat_int_union_user_seller']],
                      on = 'user_merchant_pair', how = 'left')

In [176]:
df_combined = df_combined.drop_duplicates()

In [177]:
df_combined

Unnamed: 0,user_id,seller_id,label,age_range,gender,userbeforesale0,userbeforesale1,userbeforesale2,userbeforesale3,useronsale0,useronsale1,useronsale2,useronsale3,user_ratio_0_to_2_before_sale,user_ratio_3_to_2_before_sale,user_ratio_0_to_2_on_sale,user_ratio_3_to_2_on_sale,active_days,purchase_days,useractivity5,useractivity6,useractivity7,useractivity8,useractivity9,useractivity10,useractivity11,userpurchase5,userpurchase6,userpurchase7,userpurchase8,userpurchase9,userpurchase10,userpurchase11,useravgactivity5,useravgactivity6,useravgactivity7,useravgactivity8,useravgactivity9,useravgactivity10,useravgactivity11,useravgpurchase5,useravgpurchase6,useravgpurchase7,useravgpurchase8,useravgpurchase9,useravgpurchase10,useravgpurchase11,user_unique_brands_before_sale,user_unique_categories_before_sale,user_unique_items_before_sale,user_unique_sellers_before_sale,user_unique_brands_purchased_before_sale,user_unique_categories_purchased_before_sale,user_unique_items_purchased_before_sale,user_unique_sellers_purchased_before_sale,user_unique_brands_on_sale,user_unique_categories_on_sale,user_unique_items_on_sale,user_unique_sellers_on_sale,user_unique_brands_purchased_on_sale,user_unique_categories_purchased_on_sale,user_unique_items_purchased_on_sale,user_unique_sellers_purchased_on_sale,user_all_month_purchase,user_purchase_only_11,sellerbeforesale0,sellerbeforesale1,sellerbeforesale2,sellerbeforesale3,selleronsale0,selleronsale1,selleronsale2,selleronsale3,userseller0,userseller1,userseller2,userseller3,age_avg_activity,age_avg_purchase,unique_users_age_norm,gender_avg_activity,gender_avg_purchase,unique_users_gender_norm,age_gender,age_gender_avg_activity,age_gender_avg_purchase,unique_users_age_gender_norm,age_gender_active_days_norm,age_gender_purchase_days_norm,seller_ratio_0_to_2_before_sale,seller_ratio_3_to_2_before_sale,seller_ratio_0_to_2_on_sale,seller_ratio_3_to_2_on_sale,selleractivity5,selleractivity6,selleractivity7,selleractivity8,selleractivity9,selleractivity10,selleractivity11,sellerpurchase5,sellerpurchase6,sellerpurchase7,sellerpurchase8,sellerpurchase9,sellerpurchase10,sellerpurchase11,selleravgactivity5,selleravgactivity6,selleravgactivity7,selleravgactivity8,selleravgactivity9,selleravgactivity10,selleravgactivity11,selleravgpurchase5,selleravgpurchase6,selleravgpurchase7,selleravgpurchase8,selleravgpurchase9,selleravgpurchase10,selleravgpurchase11,seller_unique_brands_before_sale,seller_unique_categories_before_sale,seller_unique_items_before_sale,seller_unique_users_before_sale,seller_unique_brands_purchased_before_sale,seller_unique_categories_purchased_before_sale,seller_unique_items_purchased_before_sale,seller_unique_users_purchased_before_sale,seller_unique_brands_on_sale,seller_unique_categories_on_sale,seller_unique_items_on_sale,seller_unique_users_on_sale,seller_unique_brands_purchased_on_sale,seller_unique_categories_purchased_on_sale,seller_unique_items_purchased_on_sale,seller_unique_users_purchased_on_sale,user_merchant_pair,usersellerbeforesale0,usersellerbeforesale1,usersellerbeforesale3,userselleronsale0,userselleronsale1,userselleronsale2,userselleronsale3,um_unique_brands_on_sale,um_unique_categories_on_sale,um_unique_items_on_sale,um_unique_brands_purchased_on_sale,um_unique_categories_purchased_on_sale,um_unique_items_purchased_on_sale,um_activity_before_sale,activity_diff_from_1111,activity_diff_from_second_last,purchase_diff_from_1111,purchase_diff_from_second_last,seller_count_of_label-1,brand_intersection_user_before_on_sale,brand_intersection_user_before_and_seller,ratio_brand_int_union_user_before_on_sale,ratio_brand_int_union_user_before_seller,cat_intersection_user_before_on_sale,cat_intersection_user_before_and_seller,ratio_cat_int_union_user_before_on_sale,ratio_cat_int_union_user_before_seller,ratio_cat_int_union_user_seller
0,34176,3906,0,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,13203.0,26.0,233.0,937.0,1667.0,2.0,177.0,24.0,36.0,0.0,1.0,2.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,56.665236,4.021459,9.418079,0.135593,0.082918,0.163563,0.086545,0.097117,0.157908,0.153544,0.258406,0.121951,0.151220,0.034146,0.039024,0.148780,0.070732,0.434146,64.238095,88.700000,45.419355,50.967742,85.633333,80.580645,382.181818,3.571429,2.695652,1.555556,2.285714,3.588235,1.705882,89.000000,2,20,307,5330,1.0,14.0,68.0,167.0,2,17,189,672,1,13,54,145,34176_3906,33.0,0.0,2.0,3.0,0.0,1.0,0.0,1,1,1,1,1,1,35.0,0.945652,0.945355,0.945652,0.934426,167.0,1.0,0.0,0.095238,0.000000,1.0,1.0,0.100000,0.133333,0.187500
4,34176,121,0,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,48956.0,90.0,1565.0,2441.0,23309.0,31.0,3215.0,258.0,13.0,0.0,1.0,0.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,31.281789,1.559744,7.250078,0.080249,0.055569,0.036349,0.065110,0.056057,0.130558,0.082564,0.573793,0.047071,0.016109,0.062552,0.034310,0.093933,0.055858,0.690167,211.333333,96.766667,167.741935,144.419355,347.566667,212.709677,4166.000000,10.714286,4.052632,12.458333,6.074074,14.966667,15.705882,412.375000,2,26,1170,8655,1.0,19.0,378.0,958.0,2,18,369,3942,1,16,276,1884,34176_121,8.0,0.0,0.0,5.0,0.0,1.0,0.0,1,1,1,1,1,1,8.0,0.945652,0.945355,0.945652,0.934426,958.0,1.0,0.0,0.095238,0.000000,1.0,1.0,0.100000,0.055556,0.076923
8,34176,4356,1,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,5724.0,16.0,858.0,188.0,370.0,0.0,105.0,8.0,12.0,0.0,6.0,0.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,6.671329,0.219114,3.523810,0.076190,0.000550,0.020911,0.394690,0.180355,0.126840,0.111570,0.165085,0.000000,0.026999,0.406023,0.154725,0.127726,0.111111,0.173416,1.333333,8.000000,95.633333,42.290323,30.733333,26.161290,109.090909,0.000000,2.888889,21.722222,8.277778,5.590909,4.863636,15.181818,2,14,62,2137,1.0,8.0,26.0,611.0,1,13,42,185,1,7,21,64,34176_4356,6.0,0.0,0.0,6.0,0.0,6.0,0.0,1,1,2,1,1,2,6.0,0.945652,0.945355,0.945652,0.934426,611.0,1.0,1.0,0.095238,0.058824,1.0,1.0,0.100000,0.136364,0.160000
12,34176,2217,0,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,39620.0,67.0,1018.0,3787.0,12610.0,34.0,2703.0,363.0,1.0,0.0,1.0,0.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,38.919450,3.720039,4.665187,0.134295,0.033570,0.085130,0.072672,0.053869,0.111059,0.145842,0.497857,0.026874,0.041924,0.034668,0.021768,0.045418,0.099704,0.729643,96.238095,170.833333,141.129032,104.612903,222.866667,283.225806,2724.727273,4.761905,5.379310,4.777778,4.764706,7.041667,14.269231,452.500000,2,5,377,14199,1.0,5.0,158.0,913.0,2,5,174,4319,1,4,114,2428,34176_2217,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1,1,1,1,1,1,0.0,0.945652,0.945355,0.945652,0.934426,913.0,1.0,0.0,0.095238,0.000000,1.0,1.0,0.100000,0.105263,0.136364
16,230784,4818,0,0.0,0.0,46.0,0.0,6.0,0.0,1.0,0.0,1.0,0.0,7.666667,0.000000,1.000000,0.000000,15,5,0.00000,0.518519,0.092593,0.000000,0.074074,0.037037,0.277778,0.000000,0.571429,0.142857,0.000000,0.000000,0.142857,0.142857,0.0,4.000000,5.000000,0.000000,1.333333,2.000000,3.750000,0.000000,1.333333,1.0,0.0,0.0,1.0,1.0,19,17,31,20,5.0,6.0,6.0,5.0,1,1,1,1,1,1,1,1,0.0,0.0,27211.0,103.0,583.0,1770.0,16057.0,26.0,2150.0,189.0,7.0,0.0,1.0,0.0,105.742450,6.229536,0.852016,141.133806,8.156852,1.000000,0.0_0.0,117.407391,6.637107,0.895509,0.725957,0.730747,46.674099,3.036021,7.468372,0.087907,0.027574,0.060430,0.040987,0.040321,0.048972,0.114143,0.667575,0.035492,0.047567,0.032565,0.024881,0.031467,0.037322,0.790706,63.142857,96.866667,63.580645,62.548387,78.500000,177.064516,2918.454545,4.619048,5.000000,3.708333,2.833333,3.185185,3.923077,360.166667,2,27,461,5569,1.0,17.0,126.0,332.0,2,25,302,3052,1,23,199,1285,230784_4818,6.0,0.0,0.0,1.0,0.0,1.0,0.0,1,1,1,1,1,1,6.0,0.885870,0.885246,0.885870,0.743169,332.0,0.0,0.0,0.000000,0.000000,0.0,1.0,0.000000,0.153846,0.192308
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
387647,359807,4325,0,4.0,1.0,58.0,0.0,4.0,1.0,49.0,0.0,5.0,0.0,14.500000,0.250000,9.800000,0.000000,11,4,0.00000,0.000000,0.000000,0.000000,0.085470,0.196581,0.717949,0.000000,0.000000,0.000000,0.000000,0.111111,0.222222,0.666667,0.0,0.000000,0.000000,0.000000,3.333333,4.600000,21.000000,0.000000,0.000000,0.0,0.0,1.0,1.0,3.0,22,16,31,21,4.0,4.0,4.0,4.0,11,11,19,13,2,3,3,2,0.0,0.0,11616.0,29.0,1124.0,848.0,5095.0,18.0,996.0,63.0,18.0,0.0,2.0,0.0,147.542249,9.620345,0.716419,99.741350,6.668382,0.425959,4.0_1.0,124.098490,8.449806,0.317763,0.272280,0.330118,10.334520,0.754448,5.115462,0.063253,0.023599,0.054828,0.075092,0.078175,0.124817,0.191369,0.452120,0.030189,0.047642,0.079717,0.079245,0.108019,0.157075,0.498113,22.238095,36.166667,47.935484,49.903226,82.333333,122.161290,813.363636,3.047619,3.740741,5.451613,5.600000,7.633333,10.741935,96.000000,2,18,75,4412,1.0,15.0,41.0,849.0,2,14,38,1421,1,11,29,779,359807_4325,0.0,0.0,0.0,18.0,0.0,2.0,0.0,1,2,6,1,1,1,0.0,0.282609,0.273224,0.239130,0.202186,849.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.095238
387648,294527,3971,0,0.0,1.0,121.0,0.0,1.0,24.0,41.0,0.0,4.0,7.0,121.000000,24.000000,10.250000,1.750000,5,1,0.00000,0.000000,0.000000,0.000000,0.101010,0.000000,0.898990,0.000000,0.000000,0.000000,0.000000,0.200000,0.000000,0.800000,0.0,0.000000,0.000000,0.000000,20.000000,0.000000,35.600000,0.000000,0.000000,0.0,0.0,1.0,0.0,4.0,22,16,73,23,1.0,1.0,1.0,1.0,20,10,22,20,4,4,4,4,0.0,0.0,17573.0,75.0,871.0,1414.0,7029.0,19.0,1737.0,174.0,13.0,0.0,1.0,3.0,105.742450,6.229536,0.852016,99.741350,6.668382,0.425959,0.0_1.0,77.416906,5.293887,0.303650,0.162313,0.197636,20.175660,1.623421,4.046632,0.100173,0.017825,0.073204,0.072200,0.066524,0.123944,0.133636,0.512668,0.020706,0.053298,0.049463,0.046396,0.084739,0.067868,0.677531,24.523810,70.500000,67.290323,62.000000,119.366667,124.548387,1346.545455,3.375000,5.560000,4.448276,4.033333,7.620690,5.709677,176.700000,2,7,271,5720,1.0,6.0,98.0,577.0,2,6,129,2669,1,6,79,1432,294527_3971,8.0,0.0,3.0,5.0,0.0,1.0,0.0,1,1,2,1,1,1,11.0,0.255435,0.251366,0.255435,0.000000,577.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.100000
387651,294527,152,0,0.0,1.0,121.0,0.0,1.0,24.0,41.0,0.0,4.0,7.0,121.000000,24.000000,10.250000,1.750000,5,1,0.00000,0.000000,0.000000,0.000000,0.101010,0.000000,0.898990,0.000000,0.000000,0.000000,0.000000,0.200000,0.000000,0.800000,0.0,0.000000,0.000000,0.000000,20.000000,0.000000,35.600000,0.000000,0.000000,0.0,0.0,1.0,0.0,4.0,22,16,73,23,1.0,1.0,1.0,1.0,20,10,22,20,4,4,4,4,0.0,0.0,4303.0,8.0,219.0,458.0,2526.0,4.0,544.0,45.0,7.0,0.0,1.0,1.0,105.742450,6.229536,0.852016,99.741350,6.668382,0.425959,0.0_1.0,77.416906,5.293887,0.303650,0.162313,0.197636,19.648402,2.091324,4.643382,0.082721,0.002344,0.004811,0.026027,0.125941,0.096336,0.068583,0.675959,0.001311,0.002621,0.000000,0.171691,0.095675,0.011796,0.716907,1.900000,2.294118,11.722222,46.409091,26.931034,17.935484,498.181818,1.000000,1.000000,0.000000,26.200000,14.600000,2.250000,136.750000,2,13,162,1698,1.0,5.0,16.0,176.0,2,9,99,978,1,5,40,474,294527_152,0.0,0.0,0.0,7.0,0.0,1.0,1.0,1,1,1,1,1,1,0.0,0.255435,0.251366,0.255435,0.000000,176.0,0.0,0.0,0.000000,0.000000,0.0,1.0,0.000000,0.200000,0.428571
387654,294527,2537,0,0.0,1.0,121.0,0.0,1.0,24.0,41.0,0.0,4.0,7.0,121.000000,24.000000,10.250000,1.750000,5,1,0.00000,0.000000,0.000000,0.000000,0.101010,0.000000,0.898990,0.000000,0.000000,0.000000,0.000000,0.200000,0.000000,0.800000,0.0,0.000000,0.000000,0.000000,20.000000,0.000000,35.600000,0.000000,0.000000,0.0,0.0,1.0,0.0,4.0,22,16,73,23,1.0,1.0,1.0,1.0,20,10,22,20,4,4,4,4,0.0,0.0,26137.0,85.0,1829.0,2038.0,13456.0,38.0,4231.0,280.0,0.0,0.0,1.0,0.0,105.742450,6.229536,0.852016,99.741350,6.668382,0.425959,0.0_1.0,77.416906,5.293887,0.303650,0.162313,0.197636,14.290323,1.114270,3.180336,0.066178,0.012496,0.089450,0.030461,0.072358,0.111594,0.105793,0.577848,0.015017,0.045710,0.018977,0.048515,0.093069,0.057426,0.721287,28.619048,143.400000,47.258065,112.258065,178.900000,164.129032,2526.454545,4.789474,10.653846,3.965517,9.483871,18.800000,11.225806,485.666667,2,20,221,8269,1.0,15.0,136.0,1414.0,2,18,144,3980,1,15,127,2935,294527_2537,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1,1,1,1,1,1,0.0,0.255435,0.251366,0.255435,0.000000,1414.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.050000


# User-Item

In [178]:
#items bought by user before sale

temp = df[((df.action_type == 2)&(df.time_stamp != '1900-11-11'))].groupby(['user_id'])['item_id']\
         .unique().reset_index()

temp = temp.rename(columns = {'item_id':'user_item_before_sale'})


#items bought by user on sale

temp1 = df[((df.action_type == 2)&(df.time_stamp == '1900-11-11'))].groupby(['user_id'])['item_id']\
         .unique().reset_index()

temp1 = temp1.rename(columns = {'item_id':'user_item_on_sale'})


user_item = pd.merge(df_train, temp, on = 'user_id', how = 'left')

user_item = pd.merge(user_item, temp1, on = 'user_id', how = 'left')

#items sold by merchant 

temp = df[df.action_type == 2].groupby(['seller_id'])['item_id'].unique().reset_index()

temp = temp.rename(columns = {'item_id':'seller_item'})


user_item = pd.merge(user_item, temp, on = 'seller_id', how = 'left')


user_item['index'] = user_item.index

#Make a new column with user-merchant pairs together as a key
user_item['user_merchant_pair'] = user_item.user_id.astype(str).str.cat(user_item.seller_id.astype(str), sep='_')


temp = user_item.dropna().apply(lambda row: list( set(row['user_item_before_sale'])  & 
                                                 set(row['user_item_on_sale']) ), axis=1)\
               .reset_index().rename(columns = {0 : 'items_intersection_user_before_on_sale'})


user_item = pd.merge(user_item, temp, on = 'index', how = 'left')


In [179]:
temp = user_item[user_item.items_intersection_user_before_on_sale.str.len() > 0]

temp['item_intersection_user_before_on_sale'] = 1

temp = temp[['user_id', 'item_intersection_user_before_on_sale']]

temp = temp.drop_duplicates()

df_combined = pd.merge(df_combined, temp, on = 'user_id', how = 'left')

df_combined = df_combined.fillna(0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [180]:
temp = user_item.dropna().apply(lambda row: list( set(row['user_item_before_sale'])  | 
                                                 set(row['user_item_on_sale']) ), axis=1)\
               .reset_index().rename(columns = {0 : 'items_union_user_before_on_sale'})


user_item = pd.merge(user_item, temp, on = 'index', how = 'left')

temp = user_item.dropna().apply(lambda row: list ( set(row['user_item_before_sale'])  & 
                                                   set(row['seller_item']) ), axis=1)\
               .reset_index().rename(columns = {0 : 'items_intersection_user_before_and_seller'})

user_item = pd.merge(user_item, temp, on = 'index', how = 'left')

In [181]:
temp = user_item[user_item.items_intersection_user_before_and_seller.str.len() > 0]

temp['item_intersection_user_before_and_seller'] = 1

temp = temp[['user_merchant_pair' ,'item_intersection_user_before_and_seller']]

temp = temp.drop_duplicates()

df_combined = pd.merge(df_combined, temp, on = 'user_merchant_pair', how = 'left')

df_combined = df_combined.fillna(0)


In [182]:
temp = user_item.dropna().apply(lambda row: list ( set(row['user_item_before_sale'])  | 
                                                   set(row['seller_item']) ), axis=1)\
                .reset_index().rename(columns = {0 : 'items_union_user_before_and_seller'})


user_item = pd.merge(user_item, temp, on = 'index', how = 'left')


In [183]:
user_item['items_intersection_user_before_on_sale_len'] = user_item.items_intersection_user_before_on_sale.str.len()
                                                                       

user_item['items_union_user_before_on_sale_len'] = user_item.items_union_user_before_on_sale.str.len()
                                                                       

user_item['items_intersection_user_before_and_seller_len'] = user_item.items_intersection_user_before_and_seller.str.len()
                                                                       

user_item['items_union_user_before_and_seller_len'] = user_item.items_union_user_before_and_seller.str.len()


In [184]:
temp = user_item[['user_id','seller_id','label','user_merchant_pair','items_intersection_user_before_on_sale_len',
                'items_union_user_before_on_sale_len','items_intersection_user_before_and_seller_len',
                'items_union_user_before_and_seller_len']]

temp = temp.fillna(0)

temp['ratio_item_int_union_user_before_on_sale'] = temp['items_intersection_user_before_on_sale_len']/temp['items_union_user_before_on_sale_len']


temp['ratio_item_int_union_user_before_seller'] = temp['items_intersection_user_before_and_seller_len']/temp['items_union_user_before_and_seller_len']

temp = temp.fillna(0)

In [185]:
df_combined = pd.merge(df_combined, temp[['user_id','ratio_item_int_union_user_before_on_sale']],
                      on = 'user_id', how = 'left')

df_combined = pd.merge(df_combined, temp[['user_merchant_pair','ratio_item_int_union_user_before_seller']],
                      on = 'user_merchant_pair', how = 'left')



In [186]:
df_combined = df_combined.drop_duplicates()

df_combined

Unnamed: 0,user_id,seller_id,label,age_range,gender,userbeforesale0,userbeforesale1,userbeforesale2,userbeforesale3,useronsale0,useronsale1,useronsale2,useronsale3,user_ratio_0_to_2_before_sale,user_ratio_3_to_2_before_sale,user_ratio_0_to_2_on_sale,user_ratio_3_to_2_on_sale,active_days,purchase_days,useractivity5,useractivity6,useractivity7,useractivity8,useractivity9,useractivity10,useractivity11,userpurchase5,userpurchase6,userpurchase7,userpurchase8,userpurchase9,userpurchase10,userpurchase11,useravgactivity5,useravgactivity6,useravgactivity7,useravgactivity8,useravgactivity9,useravgactivity10,useravgactivity11,useravgpurchase5,useravgpurchase6,useravgpurchase7,useravgpurchase8,useravgpurchase9,useravgpurchase10,useravgpurchase11,user_unique_brands_before_sale,user_unique_categories_before_sale,user_unique_items_before_sale,user_unique_sellers_before_sale,user_unique_brands_purchased_before_sale,user_unique_categories_purchased_before_sale,user_unique_items_purchased_before_sale,user_unique_sellers_purchased_before_sale,user_unique_brands_on_sale,user_unique_categories_on_sale,user_unique_items_on_sale,user_unique_sellers_on_sale,user_unique_brands_purchased_on_sale,user_unique_categories_purchased_on_sale,user_unique_items_purchased_on_sale,user_unique_sellers_purchased_on_sale,user_all_month_purchase,user_purchase_only_11,sellerbeforesale0,sellerbeforesale1,sellerbeforesale2,sellerbeforesale3,selleronsale0,selleronsale1,selleronsale2,selleronsale3,userseller0,userseller1,userseller2,userseller3,age_avg_activity,age_avg_purchase,unique_users_age_norm,gender_avg_activity,gender_avg_purchase,unique_users_gender_norm,age_gender,age_gender_avg_activity,age_gender_avg_purchase,unique_users_age_gender_norm,age_gender_active_days_norm,age_gender_purchase_days_norm,seller_ratio_0_to_2_before_sale,seller_ratio_3_to_2_before_sale,seller_ratio_0_to_2_on_sale,seller_ratio_3_to_2_on_sale,selleractivity5,selleractivity6,selleractivity7,selleractivity8,selleractivity9,selleractivity10,selleractivity11,sellerpurchase5,sellerpurchase6,sellerpurchase7,sellerpurchase8,sellerpurchase9,sellerpurchase10,sellerpurchase11,selleravgactivity5,selleravgactivity6,selleravgactivity7,selleravgactivity8,selleravgactivity9,selleravgactivity10,selleravgactivity11,selleravgpurchase5,selleravgpurchase6,selleravgpurchase7,selleravgpurchase8,selleravgpurchase9,selleravgpurchase10,selleravgpurchase11,seller_unique_brands_before_sale,seller_unique_categories_before_sale,seller_unique_items_before_sale,seller_unique_users_before_sale,seller_unique_brands_purchased_before_sale,seller_unique_categories_purchased_before_sale,seller_unique_items_purchased_before_sale,seller_unique_users_purchased_before_sale,seller_unique_brands_on_sale,seller_unique_categories_on_sale,seller_unique_items_on_sale,seller_unique_users_on_sale,seller_unique_brands_purchased_on_sale,seller_unique_categories_purchased_on_sale,seller_unique_items_purchased_on_sale,seller_unique_users_purchased_on_sale,user_merchant_pair,usersellerbeforesale0,usersellerbeforesale1,usersellerbeforesale3,userselleronsale0,userselleronsale1,userselleronsale2,userselleronsale3,um_unique_brands_on_sale,um_unique_categories_on_sale,um_unique_items_on_sale,um_unique_brands_purchased_on_sale,um_unique_categories_purchased_on_sale,um_unique_items_purchased_on_sale,um_activity_before_sale,activity_diff_from_1111,activity_diff_from_second_last,purchase_diff_from_1111,purchase_diff_from_second_last,seller_count_of_label-1,brand_intersection_user_before_on_sale,brand_intersection_user_before_and_seller,ratio_brand_int_union_user_before_on_sale,ratio_brand_int_union_user_before_seller,cat_intersection_user_before_on_sale,cat_intersection_user_before_and_seller,ratio_cat_int_union_user_before_on_sale,ratio_cat_int_union_user_before_seller,ratio_cat_int_union_user_seller,item_intersection_user_before_on_sale,item_intersection_user_before_and_seller,ratio_item_int_union_user_before_on_sale,ratio_item_int_union_user_before_seller
0,34176,3906,0,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,13203.0,26.0,233.0,937.0,1667.0,2.0,177.0,24.0,36.0,0.0,1.0,2.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,56.665236,4.021459,9.418079,0.135593,0.082918,0.163563,0.086545,0.097117,0.157908,0.153544,0.258406,0.121951,0.151220,0.034146,0.039024,0.148780,0.070732,0.434146,64.238095,88.700000,45.419355,50.967742,85.633333,80.580645,382.181818,3.571429,2.695652,1.555556,2.285714,3.588235,1.705882,89.000000,2,20,307,5330,1.0,14.0,68.0,167.0,2,17,189,672,1,13,54,145,34176_3906,33.0,0.0,2.0,3.0,0.0,1.0,0.0,1,1,1,1,1,1,35.0,0.945652,0.945355,0.945652,0.934426,167.0,1.0,0.0,0.095238,0.000000,1.0,1.0,0.100000,0.133333,0.187500,0.0,0.0,0.0,0.0
4,34176,121,0,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,48956.0,90.0,1565.0,2441.0,23309.0,31.0,3215.0,258.0,13.0,0.0,1.0,0.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,31.281789,1.559744,7.250078,0.080249,0.055569,0.036349,0.065110,0.056057,0.130558,0.082564,0.573793,0.047071,0.016109,0.062552,0.034310,0.093933,0.055858,0.690167,211.333333,96.766667,167.741935,144.419355,347.566667,212.709677,4166.000000,10.714286,4.052632,12.458333,6.074074,14.966667,15.705882,412.375000,2,26,1170,8655,1.0,19.0,378.0,958.0,2,18,369,3942,1,16,276,1884,34176_121,8.0,0.0,0.0,5.0,0.0,1.0,0.0,1,1,1,1,1,1,8.0,0.945652,0.945355,0.945652,0.934426,958.0,1.0,0.0,0.095238,0.000000,1.0,1.0,0.100000,0.055556,0.076923,0.0,0.0,0.0,0.0
8,34176,4356,1,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,5724.0,16.0,858.0,188.0,370.0,0.0,105.0,8.0,12.0,0.0,6.0,0.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,6.671329,0.219114,3.523810,0.076190,0.000550,0.020911,0.394690,0.180355,0.126840,0.111570,0.165085,0.000000,0.026999,0.406023,0.154725,0.127726,0.111111,0.173416,1.333333,8.000000,95.633333,42.290323,30.733333,26.161290,109.090909,0.000000,2.888889,21.722222,8.277778,5.590909,4.863636,15.181818,2,14,62,2137,1.0,8.0,26.0,611.0,1,13,42,185,1,7,21,64,34176_4356,6.0,0.0,0.0,6.0,0.0,6.0,0.0,1,1,2,1,1,2,6.0,0.945652,0.945355,0.945652,0.934426,611.0,1.0,1.0,0.095238,0.058824,1.0,1.0,0.100000,0.136364,0.160000,0.0,0.0,0.0,0.0
12,34176,2217,0,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,39620.0,67.0,1018.0,3787.0,12610.0,34.0,2703.0,363.0,1.0,0.0,1.0,0.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,38.919450,3.720039,4.665187,0.134295,0.033570,0.085130,0.072672,0.053869,0.111059,0.145842,0.497857,0.026874,0.041924,0.034668,0.021768,0.045418,0.099704,0.729643,96.238095,170.833333,141.129032,104.612903,222.866667,283.225806,2724.727273,4.761905,5.379310,4.777778,4.764706,7.041667,14.269231,452.500000,2,5,377,14199,1.0,5.0,158.0,913.0,2,5,174,4319,1,4,114,2428,34176_2217,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1,1,1,1,1,1,0.0,0.945652,0.945355,0.945652,0.934426,913.0,1.0,0.0,0.095238,0.000000,1.0,1.0,0.100000,0.105263,0.136364,0.0,0.0,0.0,0.0
16,230784,4818,0,0.0,0.0,46.0,0.0,6.0,0.0,1.0,0.0,1.0,0.0,7.666667,0.000000,1.000000,0.000000,15,5,0.00000,0.518519,0.092593,0.000000,0.074074,0.037037,0.277778,0.000000,0.571429,0.142857,0.000000,0.000000,0.142857,0.142857,0.0,4.000000,5.000000,0.000000,1.333333,2.000000,3.750000,0.000000,1.333333,1.0,0.0,0.0,1.0,1.0,19,17,31,20,5.0,6.0,6.0,5.0,1,1,1,1,1,1,1,1,0.0,0.0,27211.0,103.0,583.0,1770.0,16057.0,26.0,2150.0,189.0,7.0,0.0,1.0,0.0,105.742450,6.229536,0.852016,141.133806,8.156852,1.000000,0.0_0.0,117.407391,6.637107,0.895509,0.725957,0.730747,46.674099,3.036021,7.468372,0.087907,0.027574,0.060430,0.040987,0.040321,0.048972,0.114143,0.667575,0.035492,0.047567,0.032565,0.024881,0.031467,0.037322,0.790706,63.142857,96.866667,63.580645,62.548387,78.500000,177.064516,2918.454545,4.619048,5.000000,3.708333,2.833333,3.185185,3.923077,360.166667,2,27,461,5569,1.0,17.0,126.0,332.0,2,25,302,3052,1,23,199,1285,230784_4818,6.0,0.0,0.0,1.0,0.0,1.0,0.0,1,1,1,1,1,1,6.0,0.885870,0.885246,0.885870,0.743169,332.0,0.0,0.0,0.000000,0.000000,0.0,1.0,0.000000,0.153846,0.192308,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
387647,359807,4325,0,4.0,1.0,58.0,0.0,4.0,1.0,49.0,0.0,5.0,0.0,14.500000,0.250000,9.800000,0.000000,11,4,0.00000,0.000000,0.000000,0.000000,0.085470,0.196581,0.717949,0.000000,0.000000,0.000000,0.000000,0.111111,0.222222,0.666667,0.0,0.000000,0.000000,0.000000,3.333333,4.600000,21.000000,0.000000,0.000000,0.0,0.0,1.0,1.0,3.0,22,16,31,21,4.0,4.0,4.0,4.0,11,11,19,13,2,3,3,2,0.0,0.0,11616.0,29.0,1124.0,848.0,5095.0,18.0,996.0,63.0,18.0,0.0,2.0,0.0,147.542249,9.620345,0.716419,99.741350,6.668382,0.425959,4.0_1.0,124.098490,8.449806,0.317763,0.272280,0.330118,10.334520,0.754448,5.115462,0.063253,0.023599,0.054828,0.075092,0.078175,0.124817,0.191369,0.452120,0.030189,0.047642,0.079717,0.079245,0.108019,0.157075,0.498113,22.238095,36.166667,47.935484,49.903226,82.333333,122.161290,813.363636,3.047619,3.740741,5.451613,5.600000,7.633333,10.741935,96.000000,2,18,75,4412,1.0,15.0,41.0,849.0,2,14,38,1421,1,11,29,779,359807_4325,0.0,0.0,0.0,18.0,0.0,2.0,0.0,1,2,6,1,1,1,0.0,0.282609,0.273224,0.239130,0.202186,849.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.095238,0.0,0.0,0.0,0.0
387648,294527,3971,0,0.0,1.0,121.0,0.0,1.0,24.0,41.0,0.0,4.0,7.0,121.000000,24.000000,10.250000,1.750000,5,1,0.00000,0.000000,0.000000,0.000000,0.101010,0.000000,0.898990,0.000000,0.000000,0.000000,0.000000,0.200000,0.000000,0.800000,0.0,0.000000,0.000000,0.000000,20.000000,0.000000,35.600000,0.000000,0.000000,0.0,0.0,1.0,0.0,4.0,22,16,73,23,1.0,1.0,1.0,1.0,20,10,22,20,4,4,4,4,0.0,0.0,17573.0,75.0,871.0,1414.0,7029.0,19.0,1737.0,174.0,13.0,0.0,1.0,3.0,105.742450,6.229536,0.852016,99.741350,6.668382,0.425959,0.0_1.0,77.416906,5.293887,0.303650,0.162313,0.197636,20.175660,1.623421,4.046632,0.100173,0.017825,0.073204,0.072200,0.066524,0.123944,0.133636,0.512668,0.020706,0.053298,0.049463,0.046396,0.084739,0.067868,0.677531,24.523810,70.500000,67.290323,62.000000,119.366667,124.548387,1346.545455,3.375000,5.560000,4.448276,4.033333,7.620690,5.709677,176.700000,2,7,271,5720,1.0,6.0,98.0,577.0,2,6,129,2669,1,6,79,1432,294527_3971,8.0,0.0,3.0,5.0,0.0,1.0,0.0,1,1,2,1,1,1,11.0,0.255435,0.251366,0.255435,0.000000,577.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.100000,0.0,0.0,0.0,0.0
387651,294527,152,0,0.0,1.0,121.0,0.0,1.0,24.0,41.0,0.0,4.0,7.0,121.000000,24.000000,10.250000,1.750000,5,1,0.00000,0.000000,0.000000,0.000000,0.101010,0.000000,0.898990,0.000000,0.000000,0.000000,0.000000,0.200000,0.000000,0.800000,0.0,0.000000,0.000000,0.000000,20.000000,0.000000,35.600000,0.000000,0.000000,0.0,0.0,1.0,0.0,4.0,22,16,73,23,1.0,1.0,1.0,1.0,20,10,22,20,4,4,4,4,0.0,0.0,4303.0,8.0,219.0,458.0,2526.0,4.0,544.0,45.0,7.0,0.0,1.0,1.0,105.742450,6.229536,0.852016,99.741350,6.668382,0.425959,0.0_1.0,77.416906,5.293887,0.303650,0.162313,0.197636,19.648402,2.091324,4.643382,0.082721,0.002344,0.004811,0.026027,0.125941,0.096336,0.068583,0.675959,0.001311,0.002621,0.000000,0.171691,0.095675,0.011796,0.716907,1.900000,2.294118,11.722222,46.409091,26.931034,17.935484,498.181818,1.000000,1.000000,0.000000,26.200000,14.600000,2.250000,136.750000,2,13,162,1698,1.0,5.0,16.0,176.0,2,9,99,978,1,5,40,474,294527_152,0.0,0.0,0.0,7.0,0.0,1.0,1.0,1,1,1,1,1,1,0.0,0.255435,0.251366,0.255435,0.000000,176.0,0.0,0.0,0.000000,0.000000,0.0,1.0,0.000000,0.200000,0.428571,0.0,0.0,0.0,0.0
387654,294527,2537,0,0.0,1.0,121.0,0.0,1.0,24.0,41.0,0.0,4.0,7.0,121.000000,24.000000,10.250000,1.750000,5,1,0.00000,0.000000,0.000000,0.000000,0.101010,0.000000,0.898990,0.000000,0.000000,0.000000,0.000000,0.200000,0.000000,0.800000,0.0,0.000000,0.000000,0.000000,20.000000,0.000000,35.600000,0.000000,0.000000,0.0,0.0,1.0,0.0,4.0,22,16,73,23,1.0,1.0,1.0,1.0,20,10,22,20,4,4,4,4,0.0,0.0,26137.0,85.0,1829.0,2038.0,13456.0,38.0,4231.0,280.0,0.0,0.0,1.0,0.0,105.742450,6.229536,0.852016,99.741350,6.668382,0.425959,0.0_1.0,77.416906,5.293887,0.303650,0.162313,0.197636,14.290323,1.114270,3.180336,0.066178,0.012496,0.089450,0.030461,0.072358,0.111594,0.105793,0.577848,0.015017,0.045710,0.018977,0.048515,0.093069,0.057426,0.721287,28.619048,143.400000,47.258065,112.258065,178.900000,164.129032,2526.454545,4.789474,10.653846,3.965517,9.483871,18.800000,11.225806,485.666667,2,20,221,8269,1.0,15.0,136.0,1414.0,2,18,144,3980,1,15,127,2935,294527_2537,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1,1,1,1,1,1,0.0,0.255435,0.251366,0.255435,0.000000,1414.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.050000,0.0,0.0,0.0,0.0


# TREND

In [188]:
df_combined

Unnamed: 0,user_id,seller_id,label,age_range,gender,userbeforesale0,userbeforesale1,userbeforesale2,userbeforesale3,useronsale0,useronsale1,useronsale2,useronsale3,user_ratio_0_to_2_before_sale,user_ratio_3_to_2_before_sale,user_ratio_0_to_2_on_sale,user_ratio_3_to_2_on_sale,active_days,purchase_days,useractivity5,useractivity6,useractivity7,useractivity8,useractivity9,useractivity10,useractivity11,userpurchase5,userpurchase6,userpurchase7,userpurchase8,userpurchase9,userpurchase10,userpurchase11,useravgactivity5,useravgactivity6,useravgactivity7,useravgactivity8,useravgactivity9,useravgactivity10,useravgactivity11,useravgpurchase5,useravgpurchase6,useravgpurchase7,useravgpurchase8,useravgpurchase9,useravgpurchase10,useravgpurchase11,user_unique_brands_before_sale,user_unique_categories_before_sale,user_unique_items_before_sale,user_unique_sellers_before_sale,user_unique_brands_purchased_before_sale,user_unique_categories_purchased_before_sale,user_unique_items_purchased_before_sale,user_unique_sellers_purchased_before_sale,user_unique_brands_on_sale,user_unique_categories_on_sale,user_unique_items_on_sale,user_unique_sellers_on_sale,user_unique_brands_purchased_on_sale,user_unique_categories_purchased_on_sale,user_unique_items_purchased_on_sale,user_unique_sellers_purchased_on_sale,user_all_month_purchase,user_purchase_only_11,sellerbeforesale0,sellerbeforesale1,sellerbeforesale2,sellerbeforesale3,selleronsale0,selleronsale1,selleronsale2,selleronsale3,userseller0,userseller1,userseller2,userseller3,age_avg_activity,age_avg_purchase,unique_users_age_norm,gender_avg_activity,gender_avg_purchase,unique_users_gender_norm,age_gender,age_gender_avg_activity,age_gender_avg_purchase,unique_users_age_gender_norm,age_gender_active_days_norm,age_gender_purchase_days_norm,seller_ratio_0_to_2_before_sale,seller_ratio_3_to_2_before_sale,seller_ratio_0_to_2_on_sale,seller_ratio_3_to_2_on_sale,selleractivity5,selleractivity6,selleractivity7,selleractivity8,selleractivity9,selleractivity10,selleractivity11,sellerpurchase5,sellerpurchase6,sellerpurchase7,sellerpurchase8,sellerpurchase9,sellerpurchase10,sellerpurchase11,selleravgactivity5,selleravgactivity6,selleravgactivity7,selleravgactivity8,selleravgactivity9,selleravgactivity10,selleravgactivity11,selleravgpurchase5,selleravgpurchase6,selleravgpurchase7,selleravgpurchase8,selleravgpurchase9,selleravgpurchase10,selleravgpurchase11,seller_unique_brands_before_sale,seller_unique_categories_before_sale,seller_unique_items_before_sale,seller_unique_users_before_sale,seller_unique_brands_purchased_before_sale,seller_unique_categories_purchased_before_sale,seller_unique_items_purchased_before_sale,seller_unique_users_purchased_before_sale,seller_unique_brands_on_sale,seller_unique_categories_on_sale,seller_unique_items_on_sale,seller_unique_users_on_sale,seller_unique_brands_purchased_on_sale,seller_unique_categories_purchased_on_sale,seller_unique_items_purchased_on_sale,seller_unique_users_purchased_on_sale,user_merchant_pair,usersellerbeforesale0,usersellerbeforesale1,usersellerbeforesale3,userselleronsale0,userselleronsale1,userselleronsale2,userselleronsale3,um_unique_brands_on_sale,um_unique_categories_on_sale,um_unique_items_on_sale,um_unique_brands_purchased_on_sale,um_unique_categories_purchased_on_sale,um_unique_items_purchased_on_sale,um_activity_before_sale,activity_diff_from_1111,activity_diff_from_second_last,purchase_diff_from_1111,purchase_diff_from_second_last,seller_count_of_label-1,brand_intersection_user_before_on_sale,brand_intersection_user_before_and_seller,ratio_brand_int_union_user_before_on_sale,ratio_brand_int_union_user_before_seller,cat_intersection_user_before_on_sale,cat_intersection_user_before_and_seller,ratio_cat_int_union_user_before_on_sale,ratio_cat_int_union_user_before_seller,ratio_cat_int_union_user_seller,item_intersection_user_before_on_sale,item_intersection_user_before_and_seller,ratio_item_int_union_user_before_on_sale,ratio_item_int_union_user_before_seller
0,34176,3906,0,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,13203.0,26.0,233.0,937.0,1667.0,2.0,177.0,24.0,36.0,0.0,1.0,2.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,56.665236,4.021459,9.418079,0.135593,0.082918,0.163563,0.086545,0.097117,0.157908,0.153544,0.258406,0.121951,0.151220,0.034146,0.039024,0.148780,0.070732,0.434146,64.238095,88.700000,45.419355,50.967742,85.633333,80.580645,382.181818,3.571429,2.695652,1.555556,2.285714,3.588235,1.705882,89.000000,2,20,307,5330,1.0,14.0,68.0,167.0,2,17,189,672,1,13,54,145,34176_3906,33.0,0.0,2.0,3.0,0.0,1.0,0.0,1,1,1,1,1,1,35.0,0.945652,0.945355,0.945652,0.934426,167.0,1.0,0.0,0.095238,0.000000,1.0,1.0,0.100000,0.133333,0.187500,0.0,0.0,0.0,0.0
4,34176,121,0,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,48956.0,90.0,1565.0,2441.0,23309.0,31.0,3215.0,258.0,13.0,0.0,1.0,0.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,31.281789,1.559744,7.250078,0.080249,0.055569,0.036349,0.065110,0.056057,0.130558,0.082564,0.573793,0.047071,0.016109,0.062552,0.034310,0.093933,0.055858,0.690167,211.333333,96.766667,167.741935,144.419355,347.566667,212.709677,4166.000000,10.714286,4.052632,12.458333,6.074074,14.966667,15.705882,412.375000,2,26,1170,8655,1.0,19.0,378.0,958.0,2,18,369,3942,1,16,276,1884,34176_121,8.0,0.0,0.0,5.0,0.0,1.0,0.0,1,1,1,1,1,1,8.0,0.945652,0.945355,0.945652,0.934426,958.0,1.0,0.0,0.095238,0.000000,1.0,1.0,0.100000,0.055556,0.076923,0.0,0.0,0.0,0.0
8,34176,4356,1,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,5724.0,16.0,858.0,188.0,370.0,0.0,105.0,8.0,12.0,0.0,6.0,0.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,6.671329,0.219114,3.523810,0.076190,0.000550,0.020911,0.394690,0.180355,0.126840,0.111570,0.165085,0.000000,0.026999,0.406023,0.154725,0.127726,0.111111,0.173416,1.333333,8.000000,95.633333,42.290323,30.733333,26.161290,109.090909,0.000000,2.888889,21.722222,8.277778,5.590909,4.863636,15.181818,2,14,62,2137,1.0,8.0,26.0,611.0,1,13,42,185,1,7,21,64,34176_4356,6.0,0.0,0.0,6.0,0.0,6.0,0.0,1,1,2,1,1,2,6.0,0.945652,0.945355,0.945652,0.934426,611.0,1.0,1.0,0.095238,0.058824,1.0,1.0,0.100000,0.136364,0.160000,0.0,0.0,0.0,0.0
12,34176,2217,0,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,39620.0,67.0,1018.0,3787.0,12610.0,34.0,2703.0,363.0,1.0,0.0,1.0,0.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,38.919450,3.720039,4.665187,0.134295,0.033570,0.085130,0.072672,0.053869,0.111059,0.145842,0.497857,0.026874,0.041924,0.034668,0.021768,0.045418,0.099704,0.729643,96.238095,170.833333,141.129032,104.612903,222.866667,283.225806,2724.727273,4.761905,5.379310,4.777778,4.764706,7.041667,14.269231,452.500000,2,5,377,14199,1.0,5.0,158.0,913.0,2,5,174,4319,1,4,114,2428,34176_2217,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1,1,1,1,1,1,0.0,0.945652,0.945355,0.945652,0.934426,913.0,1.0,0.0,0.095238,0.000000,1.0,1.0,0.100000,0.105263,0.136364,0.0,0.0,0.0,0.0
16,230784,4818,0,0.0,0.0,46.0,0.0,6.0,0.0,1.0,0.0,1.0,0.0,7.666667,0.000000,1.000000,0.000000,15,5,0.00000,0.518519,0.092593,0.000000,0.074074,0.037037,0.277778,0.000000,0.571429,0.142857,0.000000,0.000000,0.142857,0.142857,0.0,4.000000,5.000000,0.000000,1.333333,2.000000,3.750000,0.000000,1.333333,1.0,0.0,0.0,1.0,1.0,19,17,31,20,5.0,6.0,6.0,5.0,1,1,1,1,1,1,1,1,0.0,0.0,27211.0,103.0,583.0,1770.0,16057.0,26.0,2150.0,189.0,7.0,0.0,1.0,0.0,105.742450,6.229536,0.852016,141.133806,8.156852,1.000000,0.0_0.0,117.407391,6.637107,0.895509,0.725957,0.730747,46.674099,3.036021,7.468372,0.087907,0.027574,0.060430,0.040987,0.040321,0.048972,0.114143,0.667575,0.035492,0.047567,0.032565,0.024881,0.031467,0.037322,0.790706,63.142857,96.866667,63.580645,62.548387,78.500000,177.064516,2918.454545,4.619048,5.000000,3.708333,2.833333,3.185185,3.923077,360.166667,2,27,461,5569,1.0,17.0,126.0,332.0,2,25,302,3052,1,23,199,1285,230784_4818,6.0,0.0,0.0,1.0,0.0,1.0,0.0,1,1,1,1,1,1,6.0,0.885870,0.885246,0.885870,0.743169,332.0,0.0,0.0,0.000000,0.000000,0.0,1.0,0.000000,0.153846,0.192308,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
387647,359807,4325,0,4.0,1.0,58.0,0.0,4.0,1.0,49.0,0.0,5.0,0.0,14.500000,0.250000,9.800000,0.000000,11,4,0.00000,0.000000,0.000000,0.000000,0.085470,0.196581,0.717949,0.000000,0.000000,0.000000,0.000000,0.111111,0.222222,0.666667,0.0,0.000000,0.000000,0.000000,3.333333,4.600000,21.000000,0.000000,0.000000,0.0,0.0,1.0,1.0,3.0,22,16,31,21,4.0,4.0,4.0,4.0,11,11,19,13,2,3,3,2,0.0,0.0,11616.0,29.0,1124.0,848.0,5095.0,18.0,996.0,63.0,18.0,0.0,2.0,0.0,147.542249,9.620345,0.716419,99.741350,6.668382,0.425959,4.0_1.0,124.098490,8.449806,0.317763,0.272280,0.330118,10.334520,0.754448,5.115462,0.063253,0.023599,0.054828,0.075092,0.078175,0.124817,0.191369,0.452120,0.030189,0.047642,0.079717,0.079245,0.108019,0.157075,0.498113,22.238095,36.166667,47.935484,49.903226,82.333333,122.161290,813.363636,3.047619,3.740741,5.451613,5.600000,7.633333,10.741935,96.000000,2,18,75,4412,1.0,15.0,41.0,849.0,2,14,38,1421,1,11,29,779,359807_4325,0.0,0.0,0.0,18.0,0.0,2.0,0.0,1,2,6,1,1,1,0.0,0.282609,0.273224,0.239130,0.202186,849.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.095238,0.0,0.0,0.0,0.0
387648,294527,3971,0,0.0,1.0,121.0,0.0,1.0,24.0,41.0,0.0,4.0,7.0,121.000000,24.000000,10.250000,1.750000,5,1,0.00000,0.000000,0.000000,0.000000,0.101010,0.000000,0.898990,0.000000,0.000000,0.000000,0.000000,0.200000,0.000000,0.800000,0.0,0.000000,0.000000,0.000000,20.000000,0.000000,35.600000,0.000000,0.000000,0.0,0.0,1.0,0.0,4.0,22,16,73,23,1.0,1.0,1.0,1.0,20,10,22,20,4,4,4,4,0.0,0.0,17573.0,75.0,871.0,1414.0,7029.0,19.0,1737.0,174.0,13.0,0.0,1.0,3.0,105.742450,6.229536,0.852016,99.741350,6.668382,0.425959,0.0_1.0,77.416906,5.293887,0.303650,0.162313,0.197636,20.175660,1.623421,4.046632,0.100173,0.017825,0.073204,0.072200,0.066524,0.123944,0.133636,0.512668,0.020706,0.053298,0.049463,0.046396,0.084739,0.067868,0.677531,24.523810,70.500000,67.290323,62.000000,119.366667,124.548387,1346.545455,3.375000,5.560000,4.448276,4.033333,7.620690,5.709677,176.700000,2,7,271,5720,1.0,6.0,98.0,577.0,2,6,129,2669,1,6,79,1432,294527_3971,8.0,0.0,3.0,5.0,0.0,1.0,0.0,1,1,2,1,1,1,11.0,0.255435,0.251366,0.255435,0.000000,577.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.100000,0.0,0.0,0.0,0.0
387651,294527,152,0,0.0,1.0,121.0,0.0,1.0,24.0,41.0,0.0,4.0,7.0,121.000000,24.000000,10.250000,1.750000,5,1,0.00000,0.000000,0.000000,0.000000,0.101010,0.000000,0.898990,0.000000,0.000000,0.000000,0.000000,0.200000,0.000000,0.800000,0.0,0.000000,0.000000,0.000000,20.000000,0.000000,35.600000,0.000000,0.000000,0.0,0.0,1.0,0.0,4.0,22,16,73,23,1.0,1.0,1.0,1.0,20,10,22,20,4,4,4,4,0.0,0.0,4303.0,8.0,219.0,458.0,2526.0,4.0,544.0,45.0,7.0,0.0,1.0,1.0,105.742450,6.229536,0.852016,99.741350,6.668382,0.425959,0.0_1.0,77.416906,5.293887,0.303650,0.162313,0.197636,19.648402,2.091324,4.643382,0.082721,0.002344,0.004811,0.026027,0.125941,0.096336,0.068583,0.675959,0.001311,0.002621,0.000000,0.171691,0.095675,0.011796,0.716907,1.900000,2.294118,11.722222,46.409091,26.931034,17.935484,498.181818,1.000000,1.000000,0.000000,26.200000,14.600000,2.250000,136.750000,2,13,162,1698,1.0,5.0,16.0,176.0,2,9,99,978,1,5,40,474,294527_152,0.0,0.0,0.0,7.0,0.0,1.0,1.0,1,1,1,1,1,1,0.0,0.255435,0.251366,0.255435,0.000000,176.0,0.0,0.0,0.000000,0.000000,0.0,1.0,0.000000,0.200000,0.428571,0.0,0.0,0.0,0.0
387654,294527,2537,0,0.0,1.0,121.0,0.0,1.0,24.0,41.0,0.0,4.0,7.0,121.000000,24.000000,10.250000,1.750000,5,1,0.00000,0.000000,0.000000,0.000000,0.101010,0.000000,0.898990,0.000000,0.000000,0.000000,0.000000,0.200000,0.000000,0.800000,0.0,0.000000,0.000000,0.000000,20.000000,0.000000,35.600000,0.000000,0.000000,0.0,0.0,1.0,0.0,4.0,22,16,73,23,1.0,1.0,1.0,1.0,20,10,22,20,4,4,4,4,0.0,0.0,26137.0,85.0,1829.0,2038.0,13456.0,38.0,4231.0,280.0,0.0,0.0,1.0,0.0,105.742450,6.229536,0.852016,99.741350,6.668382,0.425959,0.0_1.0,77.416906,5.293887,0.303650,0.162313,0.197636,14.290323,1.114270,3.180336,0.066178,0.012496,0.089450,0.030461,0.072358,0.111594,0.105793,0.577848,0.015017,0.045710,0.018977,0.048515,0.093069,0.057426,0.721287,28.619048,143.400000,47.258065,112.258065,178.900000,164.129032,2526.454545,4.789474,10.653846,3.965517,9.483871,18.800000,11.225806,485.666667,2,20,221,8269,1.0,15.0,136.0,1414.0,2,18,144,3980,1,15,127,2935,294527_2537,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1,1,1,1,1,1,0.0,0.255435,0.251366,0.255435,0.000000,1414.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.050000,0.0,0.0,0.0,0.0


In [None]:
# Consecutive months ratio in certain ranges

# User-activity

In [205]:
#Monthwise activity count
temp = df[['user_id','time_stamp']].groupby(['user_id',df['time_stamp'].dt.month]).count()\
           .rename(columns = {'time_stamp':'count'}) 
    
temp = pd.pivot_table(temp, index = 'user_id', columns = 'time_stamp', values = 'count').reset_index().fillna(0)
    
temp['total'] = temp[5]+temp[6]+temp[7]+temp[8]+temp[9]+temp[10]+temp[11]

In [206]:
temp['user_monthwise_mean'] = temp['total'] /7

In [207]:
temp

time_stamp,user_id,5,6,7,8,9,10,11,total,user_monthwise_mean
0,1,0.0,0.0,0.0,0.0,0.0,16.0,17.0,33.0,4.714286
1,2,2.0,26.0,1.0,20.0,0.0,5.0,9.0,63.0,9.000000
2,3,2.0,3.0,1.0,3.0,22.0,5.0,32.0,68.0,9.714286
3,4,12.0,0.0,12.0,0.0,7.0,3.0,16.0,50.0,7.142857
4,5,16.0,21.0,3.0,37.0,12.0,52.0,32.0,173.0,24.714286
...,...,...,...,...,...,...,...,...,...,...
424165,424166,7.0,0.0,0.0,6.0,29.0,4.0,44.0,90.0,12.857143
424166,424167,7.0,0.0,0.0,0.0,2.0,9.0,17.0,35.0,5.000000
424167,424168,9.0,5.0,36.0,37.0,46.0,40.0,50.0,223.0,31.857143
424168,424169,15.0,17.0,28.0,22.0,2.0,103.0,110.0,297.0,42.428571


In [208]:
temp['useractivity6/5'] = temp[6]/temp[5]
temp['useractivity7/6'] = temp[7]/temp[6]
temp['useractivity8/7'] = temp[8]/temp[7]
temp['useractivity9/8'] = temp[9]/temp[8]
temp['useractivity10/9'] = temp[10]/temp[9]
temp['useractivity11/10'] = temp[11]/temp[10]


In [209]:
temp = temp.fillna(0)

In [210]:
temp

time_stamp,user_id,5,6,7,8,9,10,11,total,user_monthwise_mean,useractivity6/5,useractivity7/6,useractivity8/7,useractivity9/8,useractivity10/9,useractivity11/10
0,1,0.0,0.0,0.0,0.0,0.0,16.0,17.0,33.0,4.714286,0.000000,0.000000,0.000000,0.000000,inf,1.062500
1,2,2.0,26.0,1.0,20.0,0.0,5.0,9.0,63.0,9.000000,13.000000,0.038462,20.000000,0.000000,inf,1.800000
2,3,2.0,3.0,1.0,3.0,22.0,5.0,32.0,68.0,9.714286,1.500000,0.333333,3.000000,7.333333,0.227273,6.400000
3,4,12.0,0.0,12.0,0.0,7.0,3.0,16.0,50.0,7.142857,0.000000,inf,0.000000,inf,0.428571,5.333333
4,5,16.0,21.0,3.0,37.0,12.0,52.0,32.0,173.0,24.714286,1.312500,0.142857,12.333333,0.324324,4.333333,0.615385
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
424165,424166,7.0,0.0,0.0,6.0,29.0,4.0,44.0,90.0,12.857143,0.000000,0.000000,inf,4.833333,0.137931,11.000000
424166,424167,7.0,0.0,0.0,0.0,2.0,9.0,17.0,35.0,5.000000,0.000000,0.000000,0.000000,inf,4.500000,1.888889
424167,424168,9.0,5.0,36.0,37.0,46.0,40.0,50.0,223.0,31.857143,0.555556,7.200000,1.027778,1.243243,0.869565,1.250000
424168,424169,15.0,17.0,28.0,22.0,2.0,103.0,110.0,297.0,42.428571,1.133333,1.647059,0.785714,0.090909,51.500000,1.067961


In [211]:
for i in range(5,12):
    temp[i] = temp[i] - temp['user_monthwise_mean']

In [212]:
temp

time_stamp,user_id,5,6,7,8,9,10,11,total,user_monthwise_mean,useractivity6/5,useractivity7/6,useractivity8/7,useractivity9/8,useractivity10/9,useractivity11/10
0,1,-4.714286,-4.714286,-4.714286,-4.714286,-4.714286,11.285714,12.285714,33.0,4.714286,0.000000,0.000000,0.000000,0.000000,inf,1.062500
1,2,-7.000000,17.000000,-8.000000,11.000000,-9.000000,-4.000000,0.000000,63.0,9.000000,13.000000,0.038462,20.000000,0.000000,inf,1.800000
2,3,-7.714286,-6.714286,-8.714286,-6.714286,12.285714,-4.714286,22.285714,68.0,9.714286,1.500000,0.333333,3.000000,7.333333,0.227273,6.400000
3,4,4.857143,-7.142857,4.857143,-7.142857,-0.142857,-4.142857,8.857143,50.0,7.142857,0.000000,inf,0.000000,inf,0.428571,5.333333
4,5,-8.714286,-3.714286,-21.714286,12.285714,-12.714286,27.285714,7.285714,173.0,24.714286,1.312500,0.142857,12.333333,0.324324,4.333333,0.615385
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
424165,424166,-5.857143,-12.857143,-12.857143,-6.857143,16.142857,-8.857143,31.142857,90.0,12.857143,0.000000,0.000000,inf,4.833333,0.137931,11.000000
424166,424167,2.000000,-5.000000,-5.000000,-5.000000,-3.000000,4.000000,12.000000,35.0,5.000000,0.000000,0.000000,0.000000,inf,4.500000,1.888889
424167,424168,-22.857143,-26.857143,4.142857,5.142857,14.142857,8.142857,18.142857,223.0,31.857143,0.555556,7.200000,1.027778,1.243243,0.869565,1.250000
424168,424169,-27.428571,-25.428571,-14.428571,-20.428571,-40.428571,60.571429,67.571429,297.0,42.428571,1.133333,1.647059,0.785714,0.090909,51.500000,1.067961


In [213]:
temp = temp.rename(columns = {5:'useractivity5diffmean',6:'useractivity6diffmean',7:'useractivity7diffmean'
                                ,8:'useractivity8diffmean',9:'useractivity9diffmean',10:'useractivity10diffmean'
                                           ,11:'useractivity11diffmean'})

In [215]:
temp = temp.replace(np.inf, np.nan)
temp = temp.fillna(0)

In [217]:
temp = temp.drop('total', 1)

In [218]:
temp

time_stamp,user_id,useractivity5diffmean,useractivity6diffmean,useractivity7diffmean,useractivity8diffmean,useractivity9diffmean,useractivity10diffmean,useractivity11diffmean,user_monthwise_mean,useractivity6/5,useractivity7/6,useractivity8/7,useractivity9/8,useractivity10/9,useractivity11/10
0,1,-4.714286,-4.714286,-4.714286,-4.714286,-4.714286,11.285714,12.285714,4.714286,0.000000,0.000000,0.000000,0.000000,0.000000,1.062500
1,2,-7.000000,17.000000,-8.000000,11.000000,-9.000000,-4.000000,0.000000,9.000000,13.000000,0.038462,20.000000,0.000000,0.000000,1.800000
2,3,-7.714286,-6.714286,-8.714286,-6.714286,12.285714,-4.714286,22.285714,9.714286,1.500000,0.333333,3.000000,7.333333,0.227273,6.400000
3,4,4.857143,-7.142857,4.857143,-7.142857,-0.142857,-4.142857,8.857143,7.142857,0.000000,0.000000,0.000000,0.000000,0.428571,5.333333
4,5,-8.714286,-3.714286,-21.714286,12.285714,-12.714286,27.285714,7.285714,24.714286,1.312500,0.142857,12.333333,0.324324,4.333333,0.615385
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
424165,424166,-5.857143,-12.857143,-12.857143,-6.857143,16.142857,-8.857143,31.142857,12.857143,0.000000,0.000000,0.000000,4.833333,0.137931,11.000000
424166,424167,2.000000,-5.000000,-5.000000,-5.000000,-3.000000,4.000000,12.000000,5.000000,0.000000,0.000000,0.000000,0.000000,4.500000,1.888889
424167,424168,-22.857143,-26.857143,4.142857,5.142857,14.142857,8.142857,18.142857,31.857143,0.555556,7.200000,1.027778,1.243243,0.869565,1.250000
424168,424169,-27.428571,-25.428571,-14.428571,-20.428571,-40.428571,60.571429,67.571429,42.428571,1.133333,1.647059,0.785714,0.090909,51.500000,1.067961


In [219]:
df_combined = pd.merge(df_combined, temp, on = 'user_id', how = 'left')

# User-purchase

In [220]:
#Monthwise activity count
temp = df[df.action_type == 2][['user_id','time_stamp']].groupby(['user_id',df[df.action_type == 2]['time_stamp'].dt.month]).count()\
           .rename(columns = {'time_stamp':'count'}) 
    
temp = pd.pivot_table(temp, index = 'user_id', columns = 'time_stamp', values = 'count').reset_index().fillna(0)
    
temp['total'] = temp[5]+temp[6]+temp[7]+temp[8]+temp[9]+temp[10]+temp[11]

In [222]:
temp['user_purchase_monthwise_mean'] = temp['total'] /7

In [223]:
temp

time_stamp,user_id,5,6,7,8,9,10,11,total,user_purchase_monthwise_mean
0,1,0.0,0.0,0.0,0.0,0.0,2.0,4.0,6.0,0.857143
1,2,0.0,3.0,1.0,1.0,0.0,2.0,7.0,14.0,2.000000
2,3,2.0,0.0,0.0,1.0,0.0,0.0,1.0,4.0,0.571429
3,4,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.142857
4,5,2.0,1.0,0.0,1.0,2.0,4.0,3.0,13.0,1.857143
...,...,...,...,...,...,...,...,...,...,...
424165,424166,3.0,0.0,0.0,2.0,3.0,0.0,3.0,11.0,1.571429
424166,424167,1.0,0.0,0.0,0.0,0.0,2.0,3.0,6.0,0.857143
424167,424168,0.0,0.0,0.0,3.0,1.0,0.0,2.0,6.0,0.857143
424168,424169,3.0,0.0,0.0,2.0,0.0,4.0,8.0,17.0,2.428571


In [224]:
temp['userpurchase6/5'] = temp[6]/temp[5]
temp['userpurchase7/6'] = temp[7]/temp[6]
temp['userpurchase8/7'] = temp[8]/temp[7]
temp['userpurchase9/8'] = temp[9]/temp[8]
temp['userpurchase10/9'] = temp[10]/temp[9]
temp['userpurchase11/10'] = temp[11]/temp[10]


In [225]:
temp = temp.fillna(0)

In [226]:
temp

time_stamp,user_id,5,6,7,8,9,10,11,total,user_purchase_monthwise_mean,userpurchase6/5,userpurchase7/6,userpurchase8/7,userpurchase9/8,userpurchase10/9,userpurchase11/10
0,1,0.0,0.0,0.0,0.0,0.0,2.0,4.0,6.0,0.857143,0.0,0.000000,0.0,0.000000,inf,2.00
1,2,0.0,3.0,1.0,1.0,0.0,2.0,7.0,14.0,2.000000,inf,0.333333,1.0,0.000000,inf,3.50
2,3,2.0,0.0,0.0,1.0,0.0,0.0,1.0,4.0,0.571429,0.0,0.000000,inf,0.000000,0.0,inf
3,4,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.142857,0.0,0.000000,0.0,0.000000,0.0,inf
4,5,2.0,1.0,0.0,1.0,2.0,4.0,3.0,13.0,1.857143,0.5,0.000000,inf,2.000000,2.0,0.75
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
424165,424166,3.0,0.0,0.0,2.0,3.0,0.0,3.0,11.0,1.571429,0.0,0.000000,inf,1.500000,0.0,inf
424166,424167,1.0,0.0,0.0,0.0,0.0,2.0,3.0,6.0,0.857143,0.0,0.000000,0.0,0.000000,inf,1.50
424167,424168,0.0,0.0,0.0,3.0,1.0,0.0,2.0,6.0,0.857143,0.0,0.000000,inf,0.333333,0.0,inf
424168,424169,3.0,0.0,0.0,2.0,0.0,4.0,8.0,17.0,2.428571,0.0,0.000000,inf,0.000000,inf,2.00


In [227]:
for i in range(5,12):
    temp[i] = temp[i] - temp['user_purchase_monthwise_mean']

In [228]:
temp

time_stamp,user_id,5,6,7,8,9,10,11,total,user_purchase_monthwise_mean,userpurchase6/5,userpurchase7/6,userpurchase8/7,userpurchase9/8,userpurchase10/9,userpurchase11/10
0,1,-0.857143,-0.857143,-0.857143,-0.857143,-0.857143,1.142857,3.142857,6.0,0.857143,0.0,0.000000,0.0,0.000000,inf,2.00
1,2,-2.000000,1.000000,-1.000000,-1.000000,-2.000000,0.000000,5.000000,14.0,2.000000,inf,0.333333,1.0,0.000000,inf,3.50
2,3,1.428571,-0.571429,-0.571429,0.428571,-0.571429,-0.571429,0.428571,4.0,0.571429,0.0,0.000000,inf,0.000000,0.0,inf
3,4,-0.142857,-0.142857,-0.142857,-0.142857,-0.142857,-0.142857,0.857143,1.0,0.142857,0.0,0.000000,0.0,0.000000,0.0,inf
4,5,0.142857,-0.857143,-1.857143,-0.857143,0.142857,2.142857,1.142857,13.0,1.857143,0.5,0.000000,inf,2.000000,2.0,0.75
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
424165,424166,1.428571,-1.571429,-1.571429,0.428571,1.428571,-1.571429,1.428571,11.0,1.571429,0.0,0.000000,inf,1.500000,0.0,inf
424166,424167,0.142857,-0.857143,-0.857143,-0.857143,-0.857143,1.142857,2.142857,6.0,0.857143,0.0,0.000000,0.0,0.000000,inf,1.50
424167,424168,-0.857143,-0.857143,-0.857143,2.142857,0.142857,-0.857143,1.142857,6.0,0.857143,0.0,0.000000,inf,0.333333,0.0,inf
424168,424169,0.571429,-2.428571,-2.428571,-0.428571,-2.428571,1.571429,5.571429,17.0,2.428571,0.0,0.000000,inf,0.000000,inf,2.00


In [232]:
temp = temp.rename(columns = {5:'userpurchase5diffmean',6:'userpurchase6diffmean',7:'userpurchase7diffmean'
                                ,8:'userpurchase8diffmean',9:'userpurchase9diffmean',10:'userpurchase10diffmean'
                                           ,11:'userpurchase11diffmean'})

In [229]:
temp = temp.replace(np.inf, np.nan)
temp = temp.fillna(0)

In [230]:
temp = temp.drop('total', 1)

In [233]:
temp

time_stamp,user_id,userpurchase5diffmean,userpurchase6diffmean,userpurchase7diffmean,userpurchase8diffmean,userpurchase9diffmean,userpurchase10diffmean,userpurchase11diffmean,user_purchase_monthwise_mean,userpurchase6/5,userpurchase7/6,userpurchase8/7,userpurchase9/8,userpurchase10/9,userpurchase11/10
0,1,-0.857143,-0.857143,-0.857143,-0.857143,-0.857143,1.142857,3.142857,0.857143,0.0,0.000000,0.0,0.000000,0.0,2.00
1,2,-2.000000,1.000000,-1.000000,-1.000000,-2.000000,0.000000,5.000000,2.000000,0.0,0.333333,1.0,0.000000,0.0,3.50
2,3,1.428571,-0.571429,-0.571429,0.428571,-0.571429,-0.571429,0.428571,0.571429,0.0,0.000000,0.0,0.000000,0.0,0.00
3,4,-0.142857,-0.142857,-0.142857,-0.142857,-0.142857,-0.142857,0.857143,0.142857,0.0,0.000000,0.0,0.000000,0.0,0.00
4,5,0.142857,-0.857143,-1.857143,-0.857143,0.142857,2.142857,1.142857,1.857143,0.5,0.000000,0.0,2.000000,2.0,0.75
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
424165,424166,1.428571,-1.571429,-1.571429,0.428571,1.428571,-1.571429,1.428571,1.571429,0.0,0.000000,0.0,1.500000,0.0,0.00
424166,424167,0.142857,-0.857143,-0.857143,-0.857143,-0.857143,1.142857,2.142857,0.857143,0.0,0.000000,0.0,0.000000,0.0,1.50
424167,424168,-0.857143,-0.857143,-0.857143,2.142857,0.142857,-0.857143,1.142857,0.857143,0.0,0.000000,0.0,0.333333,0.0,0.00
424168,424169,0.571429,-2.428571,-2.428571,-0.428571,-2.428571,1.571429,5.571429,2.428571,0.0,0.000000,0.0,0.000000,0.0,2.00


In [234]:
df_combined = pd.merge(df_combined, temp, on = 'user_id', how = 'left')

# Seller-activity

In [235]:
#Monthwise activity count
temp = df[['seller_id','time_stamp']].groupby(['seller_id',df['time_stamp'].dt.month]).count()\
           .rename(columns = {'time_stamp':'count'}) 
    
temp = pd.pivot_table(temp, index = 'seller_id', columns = 'time_stamp', values = 'count').reset_index().fillna(0)
    
temp['total'] = temp[5]+temp[6]+temp[7]+temp[8]+temp[9]+temp[10]+temp[11]

In [236]:
temp

time_stamp,seller_id,5,6,7,8,9,10,11,total
0,1,28140.0,27652.0,27596.0,27675.0,39192.0,41806.0,147079.0,339140.0
1,2,99.0,166.0,102.0,191.0,363.0,353.0,1097.0,2371.0
2,3,83.0,213.0,111.0,93.0,212.0,803.0,1130.0,2645.0
3,4,95.0,281.0,598.0,522.0,544.0,713.0,353.0,3106.0
4,5,534.0,928.0,768.0,702.0,795.0,1423.0,3042.0,8192.0
...,...,...,...,...,...,...,...,...,...
4990,4991,5.0,7.0,4.0,8.0,20.0,162.0,460.0,666.0
4991,4992,313.0,924.0,429.0,353.0,647.0,1878.0,9332.0,13876.0
4992,4993,487.0,1431.0,2161.0,3081.0,1932.0,1670.0,3505.0,14267.0
4993,4994,165.0,658.0,186.0,221.0,770.0,1251.0,2908.0,6159.0


In [237]:
temp['seller_activity_monthwise_mean'] = temp['total'] /7

In [238]:
temp

time_stamp,seller_id,5,6,7,8,9,10,11,total,seller_activity_monthwise_mean
0,1,28140.0,27652.0,27596.0,27675.0,39192.0,41806.0,147079.0,339140.0,48448.571429
1,2,99.0,166.0,102.0,191.0,363.0,353.0,1097.0,2371.0,338.714286
2,3,83.0,213.0,111.0,93.0,212.0,803.0,1130.0,2645.0,377.857143
3,4,95.0,281.0,598.0,522.0,544.0,713.0,353.0,3106.0,443.714286
4,5,534.0,928.0,768.0,702.0,795.0,1423.0,3042.0,8192.0,1170.285714
...,...,...,...,...,...,...,...,...,...,...
4990,4991,5.0,7.0,4.0,8.0,20.0,162.0,460.0,666.0,95.142857
4991,4992,313.0,924.0,429.0,353.0,647.0,1878.0,9332.0,13876.0,1982.285714
4992,4993,487.0,1431.0,2161.0,3081.0,1932.0,1670.0,3505.0,14267.0,2038.142857
4993,4994,165.0,658.0,186.0,221.0,770.0,1251.0,2908.0,6159.0,879.857143


In [239]:
temp['selleractivity6/5'] = temp[6]/temp[5]
temp['selleractivity7/6'] = temp[7]/temp[6]
temp['selleractivity8/7'] = temp[8]/temp[7]
temp['selleractivity9/8'] = temp[9]/temp[8]
temp['selleractivity10/9'] = temp[10]/temp[9]
temp['selleractivity11/10'] = temp[11]/temp[10]


In [240]:
temp = temp.fillna(0)

In [241]:
temp

time_stamp,seller_id,5,6,7,8,9,10,11,total,seller_activity_monthwise_mean,selleractivity6/5,selleractivity7/6,selleractivity8/7,selleractivity9/8,selleractivity10/9,selleractivity11/10
0,1,28140.0,27652.0,27596.0,27675.0,39192.0,41806.0,147079.0,339140.0,48448.571429,0.982658,0.997975,1.002863,1.416152,1.066697,3.518131
1,2,99.0,166.0,102.0,191.0,363.0,353.0,1097.0,2371.0,338.714286,1.676768,0.614458,1.872549,1.900524,0.972452,3.107649
2,3,83.0,213.0,111.0,93.0,212.0,803.0,1130.0,2645.0,377.857143,2.566265,0.521127,0.837838,2.279570,3.787736,1.407223
3,4,95.0,281.0,598.0,522.0,544.0,713.0,353.0,3106.0,443.714286,2.957895,2.128114,0.872910,1.042146,1.310662,0.495091
4,5,534.0,928.0,768.0,702.0,795.0,1423.0,3042.0,8192.0,1170.285714,1.737828,0.827586,0.914062,1.132479,1.789937,2.137737
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4990,4991,5.0,7.0,4.0,8.0,20.0,162.0,460.0,666.0,95.142857,1.400000,0.571429,2.000000,2.500000,8.100000,2.839506
4991,4992,313.0,924.0,429.0,353.0,647.0,1878.0,9332.0,13876.0,1982.285714,2.952077,0.464286,0.822844,1.832861,2.902628,4.969116
4992,4993,487.0,1431.0,2161.0,3081.0,1932.0,1670.0,3505.0,14267.0,2038.142857,2.938398,1.510133,1.425729,0.627069,0.864389,2.098802
4993,4994,165.0,658.0,186.0,221.0,770.0,1251.0,2908.0,6159.0,879.857143,3.987879,0.282675,1.188172,3.484163,1.624675,2.324540


In [242]:
for i in range(5,12):
    temp[i] = temp[i] - temp['seller_activity_monthwise_mean']

In [243]:
temp

time_stamp,seller_id,5,6,7,8,9,10,11,total,seller_activity_monthwise_mean,selleractivity6/5,selleractivity7/6,selleractivity8/7,selleractivity9/8,selleractivity10/9,selleractivity11/10
0,1,-20308.571429,-20796.571429,-20852.571429,-20773.571429,-9256.571429,-6642.571429,98630.428571,339140.0,48448.571429,0.982658,0.997975,1.002863,1.416152,1.066697,3.518131
1,2,-239.714286,-172.714286,-236.714286,-147.714286,24.285714,14.285714,758.285714,2371.0,338.714286,1.676768,0.614458,1.872549,1.900524,0.972452,3.107649
2,3,-294.857143,-164.857143,-266.857143,-284.857143,-165.857143,425.142857,752.142857,2645.0,377.857143,2.566265,0.521127,0.837838,2.279570,3.787736,1.407223
3,4,-348.714286,-162.714286,154.285714,78.285714,100.285714,269.285714,-90.714286,3106.0,443.714286,2.957895,2.128114,0.872910,1.042146,1.310662,0.495091
4,5,-636.285714,-242.285714,-402.285714,-468.285714,-375.285714,252.714286,1871.714286,8192.0,1170.285714,1.737828,0.827586,0.914062,1.132479,1.789937,2.137737
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4990,4991,-90.142857,-88.142857,-91.142857,-87.142857,-75.142857,66.857143,364.857143,666.0,95.142857,1.400000,0.571429,2.000000,2.500000,8.100000,2.839506
4991,4992,-1669.285714,-1058.285714,-1553.285714,-1629.285714,-1335.285714,-104.285714,7349.714286,13876.0,1982.285714,2.952077,0.464286,0.822844,1.832861,2.902628,4.969116
4992,4993,-1551.142857,-607.142857,122.857143,1042.857143,-106.142857,-368.142857,1466.857143,14267.0,2038.142857,2.938398,1.510133,1.425729,0.627069,0.864389,2.098802
4993,4994,-714.857143,-221.857143,-693.857143,-658.857143,-109.857143,371.142857,2028.142857,6159.0,879.857143,3.987879,0.282675,1.188172,3.484163,1.624675,2.324540


In [244]:
temp = temp.rename(columns = {5:'selleractivity5diffmean',6:'selleractivity6diffmean',7:'selleractivity7diffmean'
                                ,8:'selleractivity8diffmean',9:'selleractivity9diffmean',10:'selleractivity10diffmean'
                                ,11:'selleractivity11diffmean'})

In [245]:
temp = temp.replace(np.inf, np.nan)
temp = temp.fillna(0)

In [246]:
temp = temp.drop('total', 1)

In [247]:
temp

time_stamp,seller_id,selleractivity5diffmean,selleractivity6diffmean,selleractivity7diffmean,selleractivity8diffmean,selleractivity9diffmean,selleractivity10diffmean,selleractivity11diffmean,seller_activity_monthwise_mean,selleractivity6/5,selleractivity7/6,selleractivity8/7,selleractivity9/8,selleractivity10/9,selleractivity11/10
0,1,-20308.571429,-20796.571429,-20852.571429,-20773.571429,-9256.571429,-6642.571429,98630.428571,48448.571429,0.982658,0.997975,1.002863,1.416152,1.066697,3.518131
1,2,-239.714286,-172.714286,-236.714286,-147.714286,24.285714,14.285714,758.285714,338.714286,1.676768,0.614458,1.872549,1.900524,0.972452,3.107649
2,3,-294.857143,-164.857143,-266.857143,-284.857143,-165.857143,425.142857,752.142857,377.857143,2.566265,0.521127,0.837838,2.279570,3.787736,1.407223
3,4,-348.714286,-162.714286,154.285714,78.285714,100.285714,269.285714,-90.714286,443.714286,2.957895,2.128114,0.872910,1.042146,1.310662,0.495091
4,5,-636.285714,-242.285714,-402.285714,-468.285714,-375.285714,252.714286,1871.714286,1170.285714,1.737828,0.827586,0.914062,1.132479,1.789937,2.137737
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4990,4991,-90.142857,-88.142857,-91.142857,-87.142857,-75.142857,66.857143,364.857143,95.142857,1.400000,0.571429,2.000000,2.500000,8.100000,2.839506
4991,4992,-1669.285714,-1058.285714,-1553.285714,-1629.285714,-1335.285714,-104.285714,7349.714286,1982.285714,2.952077,0.464286,0.822844,1.832861,2.902628,4.969116
4992,4993,-1551.142857,-607.142857,122.857143,1042.857143,-106.142857,-368.142857,1466.857143,2038.142857,2.938398,1.510133,1.425729,0.627069,0.864389,2.098802
4993,4994,-714.857143,-221.857143,-693.857143,-658.857143,-109.857143,371.142857,2028.142857,879.857143,3.987879,0.282675,1.188172,3.484163,1.624675,2.324540


In [249]:
df_combined = pd.merge(df_combined, temp, on = 'seller_id', how = 'left')

# Seller-purchase

In [265]:
#Monthwise activity count
temp = df[df.action_type == 2][['seller_id','time_stamp']].groupby(['seller_id',df[df.action_type == 2]['time_stamp'].dt.month]).count()\
           .rename(columns = {'time_stamp':'count'}) 
    
temp = pd.pivot_table(temp, index = 'seller_id', columns = 'time_stamp', values = 'count').reset_index().fillna(0)
    
temp['total'] = temp[5]+temp[6]+temp[7]+temp[8]+temp[9]+temp[10]+temp[11]

In [266]:
temp

time_stamp,seller_id,5,6,7,8,9,10,11,total
0,1,2329.0,1408.0,1320.0,1353.0,2143.0,1558.0,7594.0,17705.0
1,2,9.0,6.0,4.0,5.0,22.0,13.0,130.0,189.0
2,3,1.0,3.0,1.0,4.0,3.0,20.0,35.0,67.0
3,4,10.0,17.0,79.0,42.0,49.0,72.0,25.0,294.0
4,5,23.0,12.0,11.0,7.0,14.0,12.0,65.0,144.0
...,...,...,...,...,...,...,...,...,...
4990,4991,1.0,0.0,0.0,0.0,0.0,9.0,70.0,80.0
4991,4992,32.0,91.0,39.0,26.0,52.0,239.0,1492.0,1971.0
4992,4993,35.0,56.0,107.0,152.0,70.0,45.0,304.0,769.0
4993,4994,11.0,33.0,4.0,5.0,17.0,26.0,68.0,164.0


In [267]:
temp['seller_purchase_monthwise_mean'] = temp['total'] /7

In [268]:
temp

time_stamp,seller_id,5,6,7,8,9,10,11,total,seller_purchase_monthwise_mean
0,1,2329.0,1408.0,1320.0,1353.0,2143.0,1558.0,7594.0,17705.0,2529.285714
1,2,9.0,6.0,4.0,5.0,22.0,13.0,130.0,189.0,27.000000
2,3,1.0,3.0,1.0,4.0,3.0,20.0,35.0,67.0,9.571429
3,4,10.0,17.0,79.0,42.0,49.0,72.0,25.0,294.0,42.000000
4,5,23.0,12.0,11.0,7.0,14.0,12.0,65.0,144.0,20.571429
...,...,...,...,...,...,...,...,...,...,...
4990,4991,1.0,0.0,0.0,0.0,0.0,9.0,70.0,80.0,11.428571
4991,4992,32.0,91.0,39.0,26.0,52.0,239.0,1492.0,1971.0,281.571429
4992,4993,35.0,56.0,107.0,152.0,70.0,45.0,304.0,769.0,109.857143
4993,4994,11.0,33.0,4.0,5.0,17.0,26.0,68.0,164.0,23.428571


In [269]:
temp['sellerpurchase6/5'] = temp[6]/temp[5]
temp['sellerpurchase7/6'] = temp[7]/temp[6]
temp['sellerpurchase8/7'] = temp[8]/temp[7]
temp['sellerpurchase9/8'] = temp[9]/temp[8]
temp['sellerpurchase10/9'] = temp[10]/temp[9]
temp['sellerpurchase11/10'] = temp[11]/temp[10]


In [270]:
temp = temp.fillna(0)

In [271]:
temp

time_stamp,seller_id,5,6,7,8,9,10,11,total,seller_purchase_monthwise_mean,sellerpurchase6/5,sellerpurchase7/6,sellerpurchase8/7,sellerpurchase9/8,sellerpurchase10/9,sellerpurchase11/10
0,1,2329.0,1408.0,1320.0,1353.0,2143.0,1558.0,7594.0,17705.0,2529.285714,0.604551,0.937500,1.025000,1.583888,0.727018,4.874198
1,2,9.0,6.0,4.0,5.0,22.0,13.0,130.0,189.0,27.000000,0.666667,0.666667,1.250000,4.400000,0.590909,10.000000
2,3,1.0,3.0,1.0,4.0,3.0,20.0,35.0,67.0,9.571429,3.000000,0.333333,4.000000,0.750000,6.666667,1.750000
3,4,10.0,17.0,79.0,42.0,49.0,72.0,25.0,294.0,42.000000,1.700000,4.647059,0.531646,1.166667,1.469388,0.347222
4,5,23.0,12.0,11.0,7.0,14.0,12.0,65.0,144.0,20.571429,0.521739,0.916667,0.636364,2.000000,0.857143,5.416667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4990,4991,1.0,0.0,0.0,0.0,0.0,9.0,70.0,80.0,11.428571,0.000000,0.000000,0.000000,0.000000,inf,7.777778
4991,4992,32.0,91.0,39.0,26.0,52.0,239.0,1492.0,1971.0,281.571429,2.843750,0.428571,0.666667,2.000000,4.596154,6.242678
4992,4993,35.0,56.0,107.0,152.0,70.0,45.0,304.0,769.0,109.857143,1.600000,1.910714,1.420561,0.460526,0.642857,6.755556
4993,4994,11.0,33.0,4.0,5.0,17.0,26.0,68.0,164.0,23.428571,3.000000,0.121212,1.250000,3.400000,1.529412,2.615385


In [272]:
for i in range(5,12):
    temp[i] = temp[i] - temp['seller_purchase_monthwise_mean']

In [273]:
temp

time_stamp,seller_id,5,6,7,8,9,10,11,total,seller_purchase_monthwise_mean,sellerpurchase6/5,sellerpurchase7/6,sellerpurchase8/7,sellerpurchase9/8,sellerpurchase10/9,sellerpurchase11/10
0,1,-200.285714,-1121.285714,-1209.285714,-1176.285714,-386.285714,-971.285714,5064.714286,17705.0,2529.285714,0.604551,0.937500,1.025000,1.583888,0.727018,4.874198
1,2,-18.000000,-21.000000,-23.000000,-22.000000,-5.000000,-14.000000,103.000000,189.0,27.000000,0.666667,0.666667,1.250000,4.400000,0.590909,10.000000
2,3,-8.571429,-6.571429,-8.571429,-5.571429,-6.571429,10.428571,25.428571,67.0,9.571429,3.000000,0.333333,4.000000,0.750000,6.666667,1.750000
3,4,-32.000000,-25.000000,37.000000,0.000000,7.000000,30.000000,-17.000000,294.0,42.000000,1.700000,4.647059,0.531646,1.166667,1.469388,0.347222
4,5,2.428571,-8.571429,-9.571429,-13.571429,-6.571429,-8.571429,44.428571,144.0,20.571429,0.521739,0.916667,0.636364,2.000000,0.857143,5.416667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4990,4991,-10.428571,-11.428571,-11.428571,-11.428571,-11.428571,-2.428571,58.571429,80.0,11.428571,0.000000,0.000000,0.000000,0.000000,inf,7.777778
4991,4992,-249.571429,-190.571429,-242.571429,-255.571429,-229.571429,-42.571429,1210.428571,1971.0,281.571429,2.843750,0.428571,0.666667,2.000000,4.596154,6.242678
4992,4993,-74.857143,-53.857143,-2.857143,42.142857,-39.857143,-64.857143,194.142857,769.0,109.857143,1.600000,1.910714,1.420561,0.460526,0.642857,6.755556
4993,4994,-12.428571,9.571429,-19.428571,-18.428571,-6.428571,2.571429,44.571429,164.0,23.428571,3.000000,0.121212,1.250000,3.400000,1.529412,2.615385


In [274]:
temp = temp.rename(columns = {5:'sellerpurchase5diffmean',6:'sellerpurchase6diffmean',7:'sellerpurchase7diffmean'
                                ,8:'sellerpurchase8diffmean',9:'sellerpurchase9diffmean',10:'sellerpurchase10diffmean'
                                ,11:'sellerpurchase11diffmean'})

In [275]:
temp = temp.replace(np.inf, np.nan)
temp = temp.fillna(0)

In [276]:
temp = temp.drop('total', 1)

In [277]:
temp

time_stamp,seller_id,sellerpurchase5diffmean,sellerpurchase6diffmean,sellerpurchase7diffmean,sellerpurchase8diffmean,sellerpurchase9diffmean,sellerpurchase10diffmean,sellerpurchase11diffmean,seller_purchase_monthwise_mean,sellerpurchase6/5,sellerpurchase7/6,sellerpurchase8/7,sellerpurchase9/8,sellerpurchase10/9,sellerpurchase11/10
0,1,-200.285714,-1121.285714,-1209.285714,-1176.285714,-386.285714,-971.285714,5064.714286,2529.285714,0.604551,0.937500,1.025000,1.583888,0.727018,4.874198
1,2,-18.000000,-21.000000,-23.000000,-22.000000,-5.000000,-14.000000,103.000000,27.000000,0.666667,0.666667,1.250000,4.400000,0.590909,10.000000
2,3,-8.571429,-6.571429,-8.571429,-5.571429,-6.571429,10.428571,25.428571,9.571429,3.000000,0.333333,4.000000,0.750000,6.666667,1.750000
3,4,-32.000000,-25.000000,37.000000,0.000000,7.000000,30.000000,-17.000000,42.000000,1.700000,4.647059,0.531646,1.166667,1.469388,0.347222
4,5,2.428571,-8.571429,-9.571429,-13.571429,-6.571429,-8.571429,44.428571,20.571429,0.521739,0.916667,0.636364,2.000000,0.857143,5.416667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4990,4991,-10.428571,-11.428571,-11.428571,-11.428571,-11.428571,-2.428571,58.571429,11.428571,0.000000,0.000000,0.000000,0.000000,0.000000,7.777778
4991,4992,-249.571429,-190.571429,-242.571429,-255.571429,-229.571429,-42.571429,1210.428571,281.571429,2.843750,0.428571,0.666667,2.000000,4.596154,6.242678
4992,4993,-74.857143,-53.857143,-2.857143,42.142857,-39.857143,-64.857143,194.142857,109.857143,1.600000,1.910714,1.420561,0.460526,0.642857,6.755556
4993,4994,-12.428571,9.571429,-19.428571,-18.428571,-6.428571,2.571429,44.571429,23.428571,3.000000,0.121212,1.250000,3.400000,1.529412,2.615385


In [278]:
df_combined = pd.merge(df_combined, temp, on = 'seller_id', how = 'left')

In [None]:
# Cumulative increase

# /###################################################################/


# SAVE FEATURES DATAFRAME

In [279]:
df_combined

Unnamed: 0,user_id,seller_id,label,age_range,gender,userbeforesale0,userbeforesale1,userbeforesale2,userbeforesale3,useronsale0,useronsale1,useronsale2,useronsale3,user_ratio_0_to_2_before_sale,user_ratio_3_to_2_before_sale,user_ratio_0_to_2_on_sale,user_ratio_3_to_2_on_sale,active_days,purchase_days,useractivity5,useractivity6,useractivity7,useractivity8,useractivity9,useractivity10,useractivity11,userpurchase5,userpurchase6,userpurchase7,userpurchase8,userpurchase9,userpurchase10,userpurchase11,useravgactivity5,useravgactivity6,useravgactivity7,useravgactivity8,useravgactivity9,useravgactivity10,useravgactivity11,useravgpurchase5,useravgpurchase6,useravgpurchase7,useravgpurchase8,useravgpurchase9,useravgpurchase10,useravgpurchase11,user_unique_brands_before_sale,user_unique_categories_before_sale,user_unique_items_before_sale,user_unique_sellers_before_sale,user_unique_brands_purchased_before_sale,user_unique_categories_purchased_before_sale,user_unique_items_purchased_before_sale,user_unique_sellers_purchased_before_sale,user_unique_brands_on_sale,user_unique_categories_on_sale,user_unique_items_on_sale,user_unique_sellers_on_sale,user_unique_brands_purchased_on_sale,user_unique_categories_purchased_on_sale,user_unique_items_purchased_on_sale,user_unique_sellers_purchased_on_sale,user_all_month_purchase,user_purchase_only_11,sellerbeforesale0,sellerbeforesale1,sellerbeforesale2,sellerbeforesale3,selleronsale0,selleronsale1,selleronsale2,selleronsale3,userseller0,userseller1,userseller2,userseller3,age_avg_activity,age_avg_purchase,unique_users_age_norm,gender_avg_activity,gender_avg_purchase,unique_users_gender_norm,age_gender,age_gender_avg_activity,age_gender_avg_purchase,unique_users_age_gender_norm,age_gender_active_days_norm,age_gender_purchase_days_norm,seller_ratio_0_to_2_before_sale,seller_ratio_3_to_2_before_sale,seller_ratio_0_to_2_on_sale,seller_ratio_3_to_2_on_sale,selleractivity5,selleractivity6,selleractivity7,selleractivity8,selleractivity9,selleractivity10,selleractivity11,sellerpurchase5,sellerpurchase6,sellerpurchase7,sellerpurchase8,sellerpurchase9,sellerpurchase10,sellerpurchase11,selleravgactivity5,selleravgactivity6,selleravgactivity7,selleravgactivity8,selleravgactivity9,selleravgactivity10,selleravgactivity11,selleravgpurchase5,selleravgpurchase6,selleravgpurchase7,selleravgpurchase8,selleravgpurchase9,selleravgpurchase10,selleravgpurchase11,seller_unique_brands_before_sale,seller_unique_categories_before_sale,seller_unique_items_before_sale,seller_unique_users_before_sale,seller_unique_brands_purchased_before_sale,seller_unique_categories_purchased_before_sale,seller_unique_items_purchased_before_sale,seller_unique_users_purchased_before_sale,seller_unique_brands_on_sale,seller_unique_categories_on_sale,seller_unique_items_on_sale,seller_unique_users_on_sale,seller_unique_brands_purchased_on_sale,seller_unique_categories_purchased_on_sale,seller_unique_items_purchased_on_sale,seller_unique_users_purchased_on_sale,user_merchant_pair,usersellerbeforesale0,usersellerbeforesale1,usersellerbeforesale3,userselleronsale0,userselleronsale1,userselleronsale2,userselleronsale3,um_unique_brands_on_sale,um_unique_categories_on_sale,um_unique_items_on_sale,um_unique_brands_purchased_on_sale,um_unique_categories_purchased_on_sale,um_unique_items_purchased_on_sale,um_activity_before_sale,activity_diff_from_1111,activity_diff_from_second_last,purchase_diff_from_1111,purchase_diff_from_second_last,seller_count_of_label-1,brand_intersection_user_before_on_sale,brand_intersection_user_before_and_seller,ratio_brand_int_union_user_before_on_sale,ratio_brand_int_union_user_before_seller,cat_intersection_user_before_on_sale,cat_intersection_user_before_and_seller,ratio_cat_int_union_user_before_on_sale,ratio_cat_int_union_user_before_seller,ratio_cat_int_union_user_seller,item_intersection_user_before_on_sale,item_intersection_user_before_and_seller,ratio_item_int_union_user_before_on_sale,ratio_item_int_union_user_before_seller,useractivity5diffmean,useractivity6diffmean,useractivity7diffmean,useractivity8diffmean,useractivity9diffmean,useractivity10diffmean,useractivity11diffmean,user_monthwise_mean,useractivity6/5,useractivity7/6,useractivity8/7,useractivity9/8,useractivity10/9,useractivity11/10,userpurchase5diffmean,userpurchase6diffmean,userpurchase7diffmean,userpurchase8diffmean,userpurchase9diffmean,userpurchase10diffmean,userpurchase11diffmean,user_purchase_monthwise_mean,userpurchase6/5,userpurchase7/6,userpurchase8/7,userpurchase9/8,userpurchase10/9,userpurchase11/10,selleractivity5diffmean,selleractivity6diffmean,selleractivity7diffmean,selleractivity8diffmean,selleractivity9diffmean,selleractivity10diffmean,selleractivity11diffmean,seller_activity_monthwise_mean,selleractivity6/5,selleractivity7/6,selleractivity8/7,selleractivity9/8,selleractivity10/9,selleractivity11/10,sellerpurchase5diffmean,sellerpurchase6diffmean,sellerpurchase7diffmean,sellerpurchase8diffmean,sellerpurchase9diffmean,sellerpurchase10diffmean,sellerpurchase11diffmean,seller_purchase_monthwise_mean,sellerpurchase6/5,sellerpurchase7/6,sellerpurchase8/7,sellerpurchase9/8,sellerpurchase10/9,sellerpurchase11/10
0,34176,3906,0,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,13203.0,26.0,233.0,937.0,1667.0,2.0,177.0,24.0,36.0,0.0,1.0,2.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,56.665236,4.021459,9.418079,0.135593,0.082918,0.163563,0.086545,0.097117,0.157908,0.153544,0.258406,0.121951,0.151220,0.034146,0.039024,0.148780,0.070732,0.434146,64.238095,88.700000,45.419355,50.967742,85.633333,80.580645,382.181818,3.571429,2.695652,1.555556,2.285714,3.588235,1.705882,89.000000,2,20,307,5330,1.0,14.0,68.0,167.0,2,17,189,672,1,13,54,145,34176_3906,33.0,0.0,2.0,3.0,0.0,1.0,0.0,1,1,1,1,1,1,35.0,0.945652,0.945355,0.945652,0.934426,167.0,1.0,0.0,0.095238,0.000000,1.0,1.0,0.100000,0.133333,0.187500,0.0,0.0,0.0,0.0,-16.428571,-23.428571,-33.428571,-20.428571,-45.428571,81.571429,57.571429,64.428571,0.854167,0.756098,1.419355,0.431818,7.684211,0.835616,0.142857,-0.857143,-3.857143,-3.857143,-2.857143,1.142857,10.142857,4.857143,0.8,0.25,1.0,2.0,3.0,2.5,-975.142857,336.857143,-916.142857,-744.142857,244.857143,173.857143,1879.857143,2324.142857,1.972572,0.529124,1.122159,1.625949,0.972363,1.682946,-8.571429,3.428571,-44.571429,-42.571429,2.428571,-29.571429,119.428571,58.571429,1.240000,0.225806,1.142857,3.812500,0.475410,6.137931
1,34176,121,0,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,48956.0,90.0,1565.0,2441.0,23309.0,31.0,3215.0,258.0,13.0,0.0,1.0,0.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,31.281789,1.559744,7.250078,0.080249,0.055569,0.036349,0.065110,0.056057,0.130558,0.082564,0.573793,0.047071,0.016109,0.062552,0.034310,0.093933,0.055858,0.690167,211.333333,96.766667,167.741935,144.419355,347.566667,212.709677,4166.000000,10.714286,4.052632,12.458333,6.074074,14.966667,15.705882,412.375000,2,26,1170,8655,1.0,19.0,378.0,958.0,2,18,369,3942,1,16,276,1884,34176_121,8.0,0.0,0.0,5.0,0.0,1.0,0.0,1,1,1,1,1,1,8.0,0.945652,0.945355,0.945652,0.934426,958.0,1.0,0.0,0.095238,0.000000,1.0,1.0,0.100000,0.055556,0.076923,0.0,0.0,0.0,0.0,-16.428571,-23.428571,-33.428571,-20.428571,-45.428571,81.571429,57.571429,64.428571,0.854167,0.756098,1.419355,0.431818,7.684211,0.835616,0.142857,-0.857143,-3.857143,-3.857143,-2.857143,1.142857,10.142857,4.857143,0.8,0.25,1.0,2.0,3.0,2.5,-6971.285714,-8506.285714,-6209.285714,-6932.285714,-982.285714,-4815.285714,34416.714286,11409.285714,0.654123,1.791250,0.860962,2.329015,0.632397,6.949651,-457.857143,-605.857143,-383.857143,-518.857143,-233.857143,-415.857143,2616.142857,682.857143,0.342222,3.883117,0.548495,2.737805,0.594655,12.355805
2,34176,4356,1,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,5724.0,16.0,858.0,188.0,370.0,0.0,105.0,8.0,12.0,0.0,6.0,0.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,6.671329,0.219114,3.523810,0.076190,0.000550,0.020911,0.394690,0.180355,0.126840,0.111570,0.165085,0.000000,0.026999,0.406023,0.154725,0.127726,0.111111,0.173416,1.333333,8.000000,95.633333,42.290323,30.733333,26.161290,109.090909,0.000000,2.888889,21.722222,8.277778,5.590909,4.863636,15.181818,2,14,62,2137,1.0,8.0,26.0,611.0,1,13,42,185,1,7,21,64,34176_4356,6.0,0.0,0.0,6.0,0.0,6.0,0.0,1,1,2,1,1,2,6.0,0.945652,0.945355,0.945652,0.934426,611.0,1.0,1.0,0.095238,0.058824,1.0,1.0,0.100000,0.136364,0.160000,0.0,0.0,0.0,0.0,-16.428571,-23.428571,-33.428571,-20.428571,-45.428571,81.571429,57.571429,64.428571,0.854167,0.756098,1.419355,0.431818,7.684211,0.835616,0.142857,-0.857143,-3.857143,-3.857143,-2.857143,1.142857,10.142857,4.857143,0.8,0.25,1.0,2.0,3.0,2.5,-1034.428571,-886.428571,1830.571429,272.571429,-116.428571,-227.428571,161.571429,1038.428571,38.000000,18.875000,0.456954,0.703280,0.879610,1.479655,-137.571429,-111.571429,253.428571,11.428571,-14.571429,-30.571429,29.428571,137.571429,0.000000,15.038462,0.381074,0.825503,0.869919,1.560748
3,34176,2217,0,6.0,0.0,386.0,0.0,23.0,6.0,24.0,0.0,11.0,1.0,16.782609,0.260870,2.181818,0.090909,46,16,0.10643,0.090909,0.068736,0.097561,0.042129,0.323725,0.270510,0.147059,0.117647,0.029412,0.029412,0.058824,0.176471,0.441176,12.0,6.833333,10.333333,8.800000,3.800000,8.111111,20.333333,1.666667,1.333333,1.0,1.0,1.0,1.2,7.5,106,44,251,107,17.0,16.0,19.0,17.0,8,10,11,8,6,6,7,6,1.0,0.0,39620.0,67.0,1018.0,3787.0,12610.0,34.0,2703.0,363.0,1.0,0.0,1.0,0.0,152.653846,8.705109,0.317624,141.133806,8.156852,1.000000,6.0_0.0,170.208267,9.331121,0.337225,0.396320,0.386876,38.919450,3.720039,4.665187,0.134295,0.033570,0.085130,0.072672,0.053869,0.111059,0.145842,0.497857,0.026874,0.041924,0.034668,0.021768,0.045418,0.099704,0.729643,96.238095,170.833333,141.129032,104.612903,222.866667,283.225806,2724.727273,4.761905,5.379310,4.777778,4.764706,7.041667,14.269231,452.500000,2,5,377,14199,1.0,5.0,158.0,913.0,2,5,174,4319,1,4,114,2428,34176_2217,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1,1,1,1,1,1,0.0,0.945652,0.945355,0.945652,0.934426,913.0,1.0,0.0,0.095238,0.000000,1.0,1.0,0.100000,0.105263,0.136364,0.0,0.0,0.0,0.0,-16.428571,-23.428571,-33.428571,-20.428571,-45.428571,81.571429,57.571429,64.428571,0.854167,0.756098,1.419355,0.431818,7.684211,0.835616,0.142857,-0.857143,-3.857143,-3.857143,-2.857143,1.142857,10.142857,4.857143,0.8,0.25,1.0,2.0,3.0,2.5,-6579.285714,-3475.285714,-4225.285714,-5357.285714,-1914.285714,179.714286,21371.714286,8600.285714,2.535873,0.853659,0.741257,2.061671,1.313192,3.413667,-431.571429,-375.571429,-402.571429,-450.571429,-362.571429,-160.571429,2183.428571,531.571429,1.560000,0.826923,0.627907,2.086420,2.195266,7.318059
4,230784,4818,0,0.0,0.0,46.0,0.0,6.0,0.0,1.0,0.0,1.0,0.0,7.666667,0.000000,1.000000,0.000000,15,5,0.00000,0.518519,0.092593,0.000000,0.074074,0.037037,0.277778,0.000000,0.571429,0.142857,0.000000,0.000000,0.142857,0.142857,0.0,4.000000,5.000000,0.000000,1.333333,2.000000,3.750000,0.000000,1.333333,1.0,0.0,0.0,1.0,1.0,19,17,31,20,5.0,6.0,6.0,5.0,1,1,1,1,1,1,1,1,0.0,0.0,27211.0,103.0,583.0,1770.0,16057.0,26.0,2150.0,189.0,7.0,0.0,1.0,0.0,105.742450,6.229536,0.852016,141.133806,8.156852,1.000000,0.0_0.0,117.407391,6.637107,0.895509,0.725957,0.730747,46.674099,3.036021,7.468372,0.087907,0.027574,0.060430,0.040987,0.040321,0.048972,0.114143,0.667575,0.035492,0.047567,0.032565,0.024881,0.031467,0.037322,0.790706,63.142857,96.866667,63.580645,62.548387,78.500000,177.064516,2918.454545,4.619048,5.000000,3.708333,2.833333,3.185185,3.923077,360.166667,2,27,461,5569,1.0,17.0,126.0,332.0,2,25,302,3052,1,23,199,1285,230784_4818,6.0,0.0,0.0,1.0,0.0,1.0,0.0,1,1,1,1,1,1,6.0,0.885870,0.885246,0.885870,0.743169,332.0,0.0,0.0,0.000000,0.000000,0.0,1.0,0.000000,0.153846,0.192308,0.0,0.0,0.0,0.0,-7.714286,20.285714,-2.714286,-7.714286,-3.714286,-5.714286,7.285714,7.714286,0.000000,0.178571,0.000000,0.000000,0.500000,7.500000,-1.000000,3.000000,0.000000,-1.000000,-1.000000,0.000000,0.000000,1.000000,0.0,0.25,0.0,0.0,0.0,1.0,-5543.857143,-3963.857143,-4898.857143,-4930.857143,-4514.857143,-1380.857143,25233.142857,6869.857143,2.191554,0.678252,0.983765,1.214544,2.330786,5.848606,-293.428571,-260.428571,-301.428571,-322.428571,-304.428571,-288.428571,1770.571429,390.428571,1.340206,0.684615,0.764045,1.264706,1.186047,21.186275
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
260859,359807,4325,0,4.0,1.0,58.0,0.0,4.0,1.0,49.0,0.0,5.0,0.0,14.500000,0.250000,9.800000,0.000000,11,4,0.00000,0.000000,0.000000,0.000000,0.085470,0.196581,0.717949,0.000000,0.000000,0.000000,0.000000,0.111111,0.222222,0.666667,0.0,0.000000,0.000000,0.000000,3.333333,4.600000,21.000000,0.000000,0.000000,0.0,0.0,1.0,1.0,3.0,22,16,31,21,4.0,4.0,4.0,4.0,11,11,19,13,2,3,3,2,0.0,0.0,11616.0,29.0,1124.0,848.0,5095.0,18.0,996.0,63.0,18.0,0.0,2.0,0.0,147.542249,9.620345,0.716419,99.741350,6.668382,0.425959,4.0_1.0,124.098490,8.449806,0.317763,0.272280,0.330118,10.334520,0.754448,5.115462,0.063253,0.023599,0.054828,0.075092,0.078175,0.124817,0.191369,0.452120,0.030189,0.047642,0.079717,0.079245,0.108019,0.157075,0.498113,22.238095,36.166667,47.935484,49.903226,82.333333,122.161290,813.363636,3.047619,3.740741,5.451613,5.600000,7.633333,10.741935,96.000000,2,18,75,4412,1.0,15.0,41.0,849.0,2,14,38,1421,1,11,29,779,359807_4325,0.0,0.0,0.0,18.0,0.0,2.0,0.0,1,2,6,1,1,1,0.0,0.282609,0.273224,0.239130,0.202186,849.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.095238,0.0,0.0,0.0,0.0,-16.714286,-16.714286,-16.714286,-16.714286,-6.714286,6.285714,67.285714,16.714286,0.000000,0.000000,0.000000,0.000000,2.300000,3.652174,-1.285714,-1.285714,-1.285714,-1.285714,-0.285714,0.714286,4.714286,1.285714,0.0,0.00,0.0,0.0,2.0,3.0,-2360.000000,-1742.000000,-1341.000000,-1280.000000,-357.000000,960.000000,6120.000000,2827.000000,2.323340,1.369585,1.041050,1.596639,1.533198,2.362556,-238.857143,-201.857143,-133.857143,-134.857143,-73.857143,30.142857,753.142857,302.857143,1.578125,1.673267,0.994083,1.363095,1.454148,3.171171
260860,294527,3971,0,0.0,1.0,121.0,0.0,1.0,24.0,41.0,0.0,4.0,7.0,121.000000,24.000000,10.250000,1.750000,5,1,0.00000,0.000000,0.000000,0.000000,0.101010,0.000000,0.898990,0.000000,0.000000,0.000000,0.000000,0.200000,0.000000,0.800000,0.0,0.000000,0.000000,0.000000,20.000000,0.000000,35.600000,0.000000,0.000000,0.0,0.0,1.0,0.0,4.0,22,16,73,23,1.0,1.0,1.0,1.0,20,10,22,20,4,4,4,4,0.0,0.0,17573.0,75.0,871.0,1414.0,7029.0,19.0,1737.0,174.0,13.0,0.0,1.0,3.0,105.742450,6.229536,0.852016,99.741350,6.668382,0.425959,0.0_1.0,77.416906,5.293887,0.303650,0.162313,0.197636,20.175660,1.623421,4.046632,0.100173,0.017825,0.073204,0.072200,0.066524,0.123944,0.133636,0.512668,0.020706,0.053298,0.049463,0.046396,0.084739,0.067868,0.677531,24.523810,70.500000,67.290323,62.000000,119.366667,124.548387,1346.545455,3.375000,5.560000,4.448276,4.033333,7.620690,5.709677,176.700000,2,7,271,5720,1.0,6.0,98.0,577.0,2,6,129,2669,1,6,79,1432,294527_3971,8.0,0.0,3.0,5.0,0.0,1.0,0.0,1,1,2,1,1,1,11.0,0.255435,0.251366,0.255435,0.000000,577.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.100000,0.0,0.0,0.0,0.0,-28.285714,-28.285714,-28.285714,-28.285714,-8.285714,-28.285714,149.714286,28.285714,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-0.714286,-0.714286,-0.714286,-0.714286,0.285714,-0.714286,3.285714,0.714286,0.0,0.00,0.0,0.0,0.0,0.0,-3612.428571,-2012.428571,-2041.428571,-2205.428571,-546.428571,-266.428571,10684.571429,4127.428571,4.106796,0.986288,0.921381,1.863163,1.078190,3.836312,-318.571429,-233.571429,-243.571429,-251.571429,-151.571429,-195.571429,1394.428571,372.571429,2.574074,0.928058,0.937984,1.826446,0.800905,9.983051
260861,294527,152,0,0.0,1.0,121.0,0.0,1.0,24.0,41.0,0.0,4.0,7.0,121.000000,24.000000,10.250000,1.750000,5,1,0.00000,0.000000,0.000000,0.000000,0.101010,0.000000,0.898990,0.000000,0.000000,0.000000,0.000000,0.200000,0.000000,0.800000,0.0,0.000000,0.000000,0.000000,20.000000,0.000000,35.600000,0.000000,0.000000,0.0,0.0,1.0,0.0,4.0,22,16,73,23,1.0,1.0,1.0,1.0,20,10,22,20,4,4,4,4,0.0,0.0,4303.0,8.0,219.0,458.0,2526.0,4.0,544.0,45.0,7.0,0.0,1.0,1.0,105.742450,6.229536,0.852016,99.741350,6.668382,0.425959,0.0_1.0,77.416906,5.293887,0.303650,0.162313,0.197636,19.648402,2.091324,4.643382,0.082721,0.002344,0.004811,0.026027,0.125941,0.096336,0.068583,0.675959,0.001311,0.002621,0.000000,0.171691,0.095675,0.011796,0.716907,1.900000,2.294118,11.722222,46.409091,26.931034,17.935484,498.181818,1.000000,1.000000,0.000000,26.200000,14.600000,2.250000,136.750000,2,13,162,1698,1.0,5.0,16.0,176.0,2,9,99,978,1,5,40,474,294527_152,0.0,0.0,0.0,7.0,0.0,1.0,1.0,1,1,1,1,1,1,0.0,0.255435,0.251366,0.255435,0.000000,176.0,0.0,0.0,0.000000,0.000000,0.0,1.0,0.000000,0.200000,0.428571,0.0,0.0,0.0,0.0,-28.285714,-28.285714,-28.285714,-28.285714,-8.285714,-28.285714,149.714286,28.285714,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-0.714286,-0.714286,-0.714286,-0.714286,0.285714,-0.714286,3.285714,0.714286,0.0,0.00,0.0,0.0,0.0,0.0,-1139.142857,-1119.142857,-947.142857,-137.142857,-377.142857,-602.142857,4321.857143,1158.142857,2.052632,5.410256,4.838863,0.764936,0.711908,9.856115,-108.000000,-107.000000,-109.000000,22.000000,-36.000000,-100.000000,438.000000,109.000000,2.000000,0.000000,0.000000,0.557252,0.123288,60.777778
260862,294527,2537,0,0.0,1.0,121.0,0.0,1.0,24.0,41.0,0.0,4.0,7.0,121.000000,24.000000,10.250000,1.750000,5,1,0.00000,0.000000,0.000000,0.000000,0.101010,0.000000,0.898990,0.000000,0.000000,0.000000,0.000000,0.200000,0.000000,0.800000,0.0,0.000000,0.000000,0.000000,20.000000,0.000000,35.600000,0.000000,0.000000,0.0,0.0,1.0,0.0,4.0,22,16,73,23,1.0,1.0,1.0,1.0,20,10,22,20,4,4,4,4,0.0,0.0,26137.0,85.0,1829.0,2038.0,13456.0,38.0,4231.0,280.0,0.0,0.0,1.0,0.0,105.742450,6.229536,0.852016,99.741350,6.668382,0.425959,0.0_1.0,77.416906,5.293887,0.303650,0.162313,0.197636,14.290323,1.114270,3.180336,0.066178,0.012496,0.089450,0.030461,0.072358,0.111594,0.105793,0.577848,0.015017,0.045710,0.018977,0.048515,0.093069,0.057426,0.721287,28.619048,143.400000,47.258065,112.258065,178.900000,164.129032,2526.454545,4.789474,10.653846,3.965517,9.483871,18.800000,11.225806,485.666667,2,20,221,8269,1.0,15.0,136.0,1414.0,2,18,144,3980,1,15,127,2935,294527_2537,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1,1,1,1,1,1,0.0,0.255435,0.251366,0.255435,0.000000,1414.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.050000,0.0,0.0,0.0,0.0,-28.285714,-28.285714,-28.285714,-28.285714,-8.285714,-28.285714,149.714286,28.285714,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-0.714286,-0.714286,-0.714286,-0.714286,0.285714,-0.714286,3.285714,0.714286,0.0,0.00,0.0,0.0,0.0,0.0,-6269.571429,-2568.571429,-5405.571429,-3390.571429,-1503.571429,-1782.571429,20920.428571,6870.571429,7.158070,0.340539,2.375427,1.542241,0.948016,5.462068,-774.714286,-588.714286,-750.714286,-571.714286,-301.714286,-517.714286,3505.285714,865.714286,3.043956,0.415162,2.556522,1.918367,0.617021,12.560345


In [280]:
df_combined.to_csv("df_combined.csv",index=False)