In [4]:
import numpy as np 
import pandas as pd 
import warnings
import time
import sys
import datetime
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
warnings.simplefilter(action='ignore')
import gc
import dateutil.relativedelta

pd.set_option('display.width',None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_info_columns', 200)

In [None]:
# def reduce_mem_usage(df, verbose=True):
#     numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
#     start_mem = df.memory_usage().sum() / 1024**2    
#     for col in df.columns:
#         col_type = df[col].dtypes
#         if col_type in numerics:
#             c_min = df[col].min()
#             c_max = df[col].max()
#             if str(col_type)[:3] == 'int':
#                 if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
#                     df[col] = df[col].astype(np.int8)
#                 elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
#                     df[col] = df[col].astype(np.int16)
#                 elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
#                     df[col] = df[col].astype(np.int32)
#                 elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
#                     df[col] = df[col].astype(np.int64)  
#             else:
#                 if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
#                     df[col] = df[col].astype(np.float16)
#                 elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
#                     df[col] = df[col].astype(np.float32)
#                 else:
#                     df[col] = df[col].astype(np.float64)    
#     end_mem = df.memory_usage().sum() / 1024**2
#     if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
#     return df

In [None]:
historical_transactions = pd.read_csv('../input/historical_transactions.csv', parse_dates=['purchase_date'])
new_transactions = pd.read_csv('../input/new_merchant_transactions.csv', parse_dates=['purchase_date'])

In [None]:
np.percentile(historical_transactions['purchase_amount'], [1, 5, 50, 95, 99])

In [None]:
np.percentile(new_transactions['purchase_amount'], [1, 5, 50, 95, 99])

In [None]:
# historical_transactions['purchase_amount'] = historical_transactions['purchase_amount'].apply(lambda x: min(x, 1.5))

In [None]:
# new_transactions['purchase_amount'] = new_transactions['purchase_amount'].apply(lambda x: min(x, 1.5))

In [None]:
historical_transactions['category_2'] = historical_transactions['category_2'].fillna(-1, )
new_transactions['category_2'] = new_transactions['category_2'].fillna(-1, )

historical_transactions['category_3'] = historical_transactions['category_3'].map({'A':0, 'B':1, 'C':2})
new_transactions['category_3'] = new_transactions['category_3'].map({'A':0, 'B':1, 'C':2})

historical_transactions['category_3'] = historical_transactions['category_3'].fillna(-1, )
new_transactions['category_3'] = new_transactions['category_3'].fillna(-1, )

historical_transactions['merchant_id'] = historical_transactions['merchant_id'].fillna('-1', )
new_transactions['merchant_id'] = new_transactions['merchant_id'].fillna('-1', )

In [None]:
historical_transactions['category_3'].unique()

In [None]:
historical_transactions['purchase_date'].max()

In [None]:
historical_transactions['purchase_date'].min()

In [None]:
new_transactions['purchase_date'].max()

In [None]:
new_transactions['purchase_date'].min()

In [None]:
%%time
def read_data(input_file):
    df = pd.read_csv(input_file)
    df['first_active_month'] = pd.to_datetime(df['first_active_month'])
    
    return df

train = read_data('./data/train.csv')
test = read_data('./data/test.csv')

target = train['target']

gc.collect()

In [None]:
test[test['card_id']=='C_ID_c27b4f80f7']

In [None]:
# tmp = historical_transactions[historical_transactions['card_id']=='C_ID_c27b4f80f7']['purchase_date'].min() 
test['first_active_month'][test['card_id']=='C_ID_c27b4f80f7'] = pd.to_datetime(datetime.date(2017, 3, 1))

In [None]:
# historical_transactions = historical_transactions.head(200)
# new_transactions = new_transactions.head(200)
# train = train.head(200)
# test = test.head(200)

In [None]:
def sub_month_lag(x):
    return x['purchase_date_first'] - dateutil.relativedelta.relativedelta(months=x['month_lag_first'])

def a2p(a, p):
    return (a.dt.date - p.dt.date).dt.days

# def a2r(a, r):
#     return (a.dt.year-r.dt.year)*12 + (a.dt.month - r.dt.month)

In [None]:
def binarize(df):
    for col in ['authorized_flag', 'category_1']:
        df[col] = df[col].map({'Y':1, 'N':0})
    return df

historical_transactions = binarize(historical_transactions)
new_transactions = binarize(new_transactions)

In [None]:
agg_fun = {
    'authorized_flag': ['mean'], 
}
auth_mean = historical_transactions.groupby(['card_id']).agg(agg_fun)
auth_mean.columns = ['_'.join(col).strip() for col in auth_mean.columns.values]
auth_mean.reset_index(inplace=True)

train = pd.merge(train, auth_mean, on='card_id', how='left')
test = pd.merge(test, auth_mean, on='card_id', how='left')

In [None]:
# historical_transactions.count()

In [None]:
# historical_transactions = historical_transactions[historical_transactions['authorized_flag'] == 1]

In [None]:
# historical_transactions.count()

In [None]:
a_train = train[['card_id', 'first_active_month']]
a_test = test[['card_id', 'first_active_month']]

In [None]:
a_tmp = pd.concat([a_train,a_test]).drop_duplicates().reset_index(drop=True)

In [None]:
historical_transactions = pd.merge(historical_transactions, a_tmp, on='card_id', how='left')
new_transactions = pd.merge(new_transactions, a_tmp, on='card_id', how='left')

In [None]:
historical_transactions.head()

In [None]:
historical_transactions['purchase_date'] = pd.to_datetime(historical_transactions['purchase_date'])
new_transactions['purchase_date'] = pd.to_datetime(new_transactions['purchase_date'])

In [None]:
def get_r(history):
    agg_func = {
        'month_lag': ['first', ],
        'purchase_date': ['first', ],
        }
    agg_history = history.groupby(['card_id']).agg(agg_func)
    agg_history.columns = ['_'.join(col).strip() for col in agg_history.columns.values]
    agg_history.reset_index(inplace=True)
    
    agg_history['reference_date'] = agg_history.apply(sub_month_lag, axis=1)
    agg_history['reference_date'] = agg_history['reference_date'].apply(lambda x: x + dateutil.relativedelta.relativedelta(day=1, months=+1, days=-1))
    agg_history['now_date'] = agg_history['reference_date'].apply(lambda x: x + dateutil.relativedelta.relativedelta(day=1, months=+5, days=-1))

    agg_history.drop(columns=['month_lag_first', 'purchase_date_first'], inplace=True)

    return agg_history

In [None]:
hist_r = get_r(historical_transactions)

In [None]:
hist_r.head()

In [None]:
historical_transactions = pd.merge(historical_transactions, hist_r, on='card_id', how='left')
new_transactions = pd.merge(new_transactions, hist_r, on='card_id', how='left')

In [None]:
historical_transactions.head()

In [None]:
for df in [historical_transactions, new_transactions]:
    df['a2p'] = a2p(df['first_active_month'], df['purchase_date'])
    
    df['p2r'] = a2p(df['purchase_date'], df['reference_date'])
    df['p2now'] = a2p(df['purchase_date'], df['now_date'])
    
#     df['p_and_m'] = df['purchase_amount'] * (df['month_lag'].abs()+1)
    df['p_vs_m'] = df['purchase_amount'] / (df['month_lag'].abs()+1)
    
    df["installments"].replace(-1, np.NaN, inplace=True)
    df["installments"].replace(999, np.NaN, inplace=True)
    
#     df['p_and_i'] = df['purchase_amount'] * (df['installments'].abs()+1)
    df['p_vs_i'] = df['purchase_amount'] / (df['installments'].abs()+1)

#     df['year'] = df['purchase_date'].dt.year
    df['quarter'] = df['purchase_date'].dt.quarter

    df['month'] = df['purchase_date'].dt.month
    
    df['weekofyear'] = df['purchase_date'].dt.weekofyear
    df['dayofweek'] = df['purchase_date'].dt.dayofweek
    df['day'] = df['purchase_date'].dt.day
    df['hour'] = df['purchase_date'].dt.hour

#     df['is_quarter_start'] = (df.purchase_date.dt.is_quarter_start).astype(int)
#     df['is_quarter_end'] = (df.purchase_date.dt.is_quarter_end).astype(int)  
    df['is_month_start'] = (df.purchase_date.dt.is_month_start).astype(int)
#     df['is_month_end'] = (df.purchase_date.dt.is_month_end).astype(int)  

    df['weekend'] = (df.purchase_date.dt.weekday>=5).astype(int)  

In [None]:
historical_transactions['a2p'].plot.line()

In [None]:
new_transactions['a2p'].plot.line()

In [None]:
historical_transactions.head()

In [None]:
def aggregate_transactions_hist(history_):
        
    agg_func = {
#         'is_quarter_start': ['sum', 'mean'],
#         'is_quarter_end': ['sum', 'mean'],
        'is_month_start': ['mean'],
#         'is_month_end': ['sum', 'mean'],
        'weekend': ['mean'],
        'category_1': ['mean'],
        #
        
        'category_2': ['nunique', ], #
        'category_3': ['nunique', ], #
        'state_id': ['nunique', ],
        'city_id': ['nunique', ],
        'subsector_id': ['nunique', ],
        'merchant_category_id': ['nunique', ],
        'merchant_id': ['nunique', ],
#         'year': ['nunique', ],
        'quarter': ['nunique', ],
        'month': ['nunique', ], 
        'weekofyear': ['nunique', ],
        'dayofweek': ['nunique', ],
        'day': ['nunique', ],
        'hour': ['nunique', ],
        
        #
        'a2p': ['mean', 'median', 'max', 'min', 'std'], 
        
        'p2r': ['mean', 'median', 'max', 'min', 'std'], 
        'p2now': ['mean', 'median', 'max', 'min', 'std'],  ################
        
        'month_lag': ['mean', 'median', 'max', 'min', 'std'],
        'purchase_amount': ['sum', 'mean', 'median', 'max', 'min', 'std'], 
        'installments': ['sum', 'mean', 'median', 'max', 'min', 'std'], 
#         'p_and_m': ['mean', 'median', 'max', 'min', 'std'], 
        'p_vs_m': ['mean', 'median', 'max', 'min', 'std'],
#         'p_and_i': ['mean', 'median', 'max', 'min', 'std'], 
        'p_vs_i': ['mean', 'median', 'max', 'min', 'std'],
        'purchase_date': ['max', 'min'],
        'first_active_month': ['first'],
        'reference_date': ['first'],
        'now_date': ['first'],
        }
        
    history = history_
    
    for col in ['category_2', 'category_3', 
                'state_id', 'city_id', 'subsector_id', 'merchant_category_id', 'merchant_id', 
#                 'year', 
                'quarter', 
                'month', 'weekofyear', 
                'dayofweek',
                'day',
                'hour'
               ]:
        
#         freq_encode = history[col].value_counts(normalize=True)
#         history[col+'_freq'] = history[col].map(freq_encode)
        
        history[col+'_p_mean'] = history.groupby([col])['purchase_amount'].transform('mean')  # mean encode improve 0.697 to 0.694
#         history[col+'_i_mean'] = history.groupby([col])['installments'].transform('mean')

#         agg_func[col+'_freq'] = ['mean']

#         agg_func[col+'_p_mean'] = ['mean']
        agg_func[col+'_p_mean'] = ['mean', 'median', 'max', 'min', 'std']
#         agg_func[col+'_i_mean'] = ['mean', 'max', 'min', 'std']

    agg_history = history.groupby(['card_id']).agg(agg_func)
    
    agg_history.columns = ['_'.join(col).strip() for col in agg_history.columns.values]
    agg_history.reset_index(inplace=True)
    
    agg_history['first_year'] = agg_history['first_active_month_first'].dt.year
    agg_history['first_quarter'] = agg_history['first_active_month_first'].dt.quarter
    agg_history['first_month'] = agg_history['first_active_month_first'].dt.month
    
    agg_history['re_year'] = agg_history['reference_date_first'].dt.year
    agg_history['re_quarter'] = agg_history['reference_date_first'].dt.quarter
    agg_history['re_month'] = agg_history['reference_date_first'].dt.month
    
    agg_history['now_year'] = agg_history['now_date_first'].dt.year
    agg_history['now_quarter'] = agg_history['now_date_first'].dt.quarter
    agg_history['now_month'] = agg_history['now_date_first'].dt.month
    
    agg_history['a2r'] = a2p(agg_history['first_active_month_first'], agg_history['reference_date_first'])
    agg_history['r2now'] = a2p(agg_history['reference_date_first'], agg_history['now_date_first'])
    agg_history['a2now'] = a2p(agg_history['first_active_month_first'], agg_history['now_date_first']) #############

    agg_history.drop(columns=['first_active_month_first', 'reference_date_first', 'now_date_first'], inplace=True)
    
    df = (history.groupby('card_id')
          .size()
          .reset_index(name='transactions_count'))
    
    agg_history = pd.merge(df, agg_history, on='card_id', how='left')
    
    return agg_history

gc.collect()

In [None]:
%%time
history = aggregate_transactions_hist(historical_transactions)
    
history.columns = ['hist_' + c if c != 'card_id' else c for c in history.columns]

history['hist_p2p'] = (history['hist_purchase_date_max'] - history['hist_purchase_date_min']).dt.days
history['hist_sleep'] = history['hist_p2p'] - history['hist_day_nunique']

###
history['hist_p2p_vs_count'] = history['hist_p2p']/history['hist_transactions_count']
history['hist_sleep_vs_count'] = history['hist_sleep']/history['hist_transactions_count']
###
history['hist_count_vs_p2p'] = history['hist_transactions_count']/(history['hist_p2p'].abs()+1)
history['hist_sleep_vs_p2p'] = history['hist_sleep']/(history['hist_p2p'].abs()+1)
history['hist_p_vs_p2p'] = history['hist_purchase_amount_sum']/(history['hist_p2p'].abs()+1)

history['hist_i_vs_p2p'] = history['hist_installments_sum']/(history['hist_p2p'].abs()+1)

# history[:5]
gc.collect()

In [None]:
history.head()

In [None]:
train = pd.merge(train, history, on='card_id', how='left')
test = pd.merge(test, history, on='card_id', how='left')
del history; gc.collect()

In [None]:
def aggregate_transactions_new(history_):
        
    agg_func = {
#         'is_quarter_start': ['sum', 'mean'],
#         'is_quarter_end': ['sum', 'mean'],
        'is_month_start': ['mean'],
#         'is_month_end': ['sum', 'mean'],
        'weekend': ['mean'],
        'category_1': ['mean'],
        #
        
        'category_2': ['nunique', ], #
        'category_3': ['nunique', ], #
        'state_id': ['nunique', ],
        'city_id': ['nunique', ],
        'subsector_id': ['nunique', ],
        'merchant_category_id': ['nunique', ],
        'merchant_id': ['nunique', ],
#         'year': ['nunique', ],
        'quarter': ['nunique', ],
        'month': ['nunique', ], 
        'weekofyear': ['nunique', ],
        'dayofweek': ['nunique', ],
        'day': ['nunique', ],
        'hour': ['nunique', ],
        
        #
        'a2p': ['mean', 'median', 'max', 'min', 'std'],  ################
        
        'p2r': ['mean', 'median', 'max', 'min', 'std'], 
        'p2now': ['mean', 'median', 'max', 'min', 'std'], 
        
        'month_lag': ['mean', 'median', 'max', 'min', 'std'],
        'purchase_amount': ['sum', 'mean', 'median', 'max', 'min', 'std'], # 
        'installments': ['sum', 'mean', 'median', 'max', 'min', 'std'], # 
#         'p_and_m': ['mean', 'median', 'max', 'min', 'std'], # 
        'p_vs_m': ['mean', 'median', 'max', 'min', 'std'], # 
#         'p_and_i': ['mean', 'median', 'max', 'min', 'std'], # 
        'p_vs_i': ['mean', 'median', 'max', 'min', 'std'], # 
        'purchase_date': ['max', 'min'],
        }
        
    history = history_
    
    for col in ['category_2', 'category_3', 
                'state_id', 'city_id', 'subsector_id', 'merchant_category_id', 'merchant_id', 
#                 'year', 
                'quarter', 
                'month', 'weekofyear', 
                'dayofweek',
                'day',
                'hour'
               ]:
        
#         freq_encode = history[col].value_counts(normalize=True)
#         history[col+'_freq'] = history[col].map(freq_encode)
        
        history[col+'_p_mean'] = history.groupby([col])['purchase_amount'].transform('mean')  # mean encode import 0.697 to 0.694
#         history[col+'_i_mean'] = history.groupby([col])['installments'].transform('mean')

#         agg_func[col+'_freq'] = ['mean']

#         agg_func[col+'_p_mean'] = ['mean']
        agg_func[col+'_p_mean'] = ['mean', 'median', 'max', 'min', 'std']
#         agg_func[col+'_i_mean'] = ['mean', 'max', 'min', 'std']

    agg_history = history.groupby(['card_id']).agg(agg_func)
    
    agg_history.columns = ['_'.join(col).strip() for col in agg_history.columns.values]
    agg_history.reset_index(inplace=True)
        
    df = (history.groupby('card_id')
          .size()
          .reset_index(name='transactions_count'))
    
    agg_history = pd.merge(df, agg_history, on='card_id', how='left')
        
    return agg_history

gc.collect()

In [None]:
%%time
new = aggregate_transactions_new(new_transactions)
    
new.columns = ['new_' + c if c != 'card_id' else c for c in new.columns]
    
new['new_p2p'] = (new['new_purchase_date_max'] - new['new_purchase_date_min']).dt.days
new['new_sleep'] = new['new_p2p'] - new['new_day_nunique']
###
new['new_p2p_vs_count'] = new['new_p2p']/new['new_transactions_count']
new['new_sleep_vs_count'] = new['new_sleep']/new['new_transactions_count']
###
new['new_count_vs_p2p'] = new['new_transactions_count']/(new['new_p2p'].abs()+1)
new['new_sleep_vs_p2p'] = new['new_sleep']/(new['new_p2p'].abs()+1)
new['new_p_vs_p2p'] = new['new_purchase_amount_sum']/(new['new_p2p'].abs()+1)

new['new_i_vs_p2p'] = new['new_installments_sum']/(new['new_p2p'].abs()+1)

# new[:5]
gc.collect()

In [None]:
new.head()

In [None]:
train = pd.merge(train, new, on='card_id', how='left')
test = pd.merge(test, new, on='card_id', how='left')
del new; gc.collect()

In [None]:
train['outliers'] = 0
train.loc[train['target'] < -30, 'outliers'] = 1
train['outliers'].value_counts()

In [None]:
for df in [train, test]:
    ###
    df['c_p2p_diff'] = df['hist_p2p_vs_count'] - df['new_p2p_vs_count']
    df['c_sleep_diff'] = df['hist_sleep_vs_count'] - df['new_sleep_vs_count']
    df['c_p_diff'] = df['hist_purchase_amount_mean'] - df['new_purchase_amount_mean']
    df['c_i_diff'] = df['hist_installments_mean'] - df['new_installments_mean']
    ###
    df['p2p_count_diff'] = df['hist_count_vs_p2p'] - df['new_count_vs_p2p']
    df['p2p_sleep_diff'] = df['hist_sleep_vs_p2p'] - df['new_sleep_vs_p2p']
    df['p2p_p_diff'] = df['hist_p_vs_p2p'] - df['new_p_vs_p2p']
    df['p2p_i_diff'] = df['hist_i_vs_p2p'] - df['new_i_vs_p2p']
    ###
    df['c_p2p_diff_vs'] = df['c_p2p_diff'] / df['hist_p2p_vs_count']
    df['c_sleep_diff_vs'] = df['c_sleep_diff'] / df['hist_sleep_vs_count']
    df['c_p_diff_vs']  = df['c_p_diff'] / df['hist_purchase_amount_mean']
    df['c_i_diff_vs'] = df['c_i_diff'] / df['hist_installments_mean']
    ###
    df['p2p_count_diff_vs'] = df['p2p_count_diff'] / df['hist_count_vs_p2p']
    df['p2p_sleep_diff_vs'] = df['p2p_sleep_diff'] / df['hist_sleep_vs_p2p']
    df['p2p_p_diff_vs']  = df['p2p_p_diff'] / df['hist_p_vs_p2p']
    df['p2p_i_diff_vs'] = df['p2p_i_diff'] / df['hist_i_vs_p2p']
    ###
#     df['count_sum'] = df['hist_transactions_count'] + df['new_transactions_count']
#     df['p_sum'] = df['hist_purchase_amount_sum'] + df['new_purchase_amount_sum']
#     df['i_sum'] = df['hist_installments_sum'] + df['new_installments_sum']
#     df['p_sum_vs_count'] = df['p_sum'] / df['count_sum'] # mean
#     df['i_sum_vs_count'] = df['i_sum'] / df['count_sum'] # mean
    ###
#     df['gap'] = a2p(df['hist_purchase_date_max'], df['new_purchase_date_min'])
    ###
#     df['time'] = a2p(df['hist_purchase_date_min'], df['new_purchase_date_max'])
#     df['sleep_sum'] = df['hist_sleep'] + df['new_sleep']

#     df['time_vs_count'] = df['time'] / df['count_sum']
#     df['sleep_sum_vs_count'] = df['sleep_sum'] / df['count_sum']
    
#     df['count_sum_vs_time'] = df['count_sum'] / (df['time'].abs() + 1)
#     df['sleep_sum_vs_time'] = df['sleep_sum'] / (df['time'].abs() + 1)
#     df['p_sum_vs_time'] = df['p_sum'] / (df['time'].abs() + 1)
#     df['i_sum_vs_time'] = df['i_sum'] / (df['time'].abs() + 1)
    ###
    
    ###
    for f in ['hist_purchase_date_max','hist_purchase_date_min', 
              'new_purchase_date_max', 'new_purchase_date_min']:
        df[f] = df[f].astype(np.int64) * 1e-9

In [None]:
train['hist_p2p'].plot.hist(bins=50)

In [None]:
train['new_p2p'].plot.hist(bins=50)

In [None]:
train['hist_sleep'].plot.hist(bins=50)

In [None]:
train['new_sleep'].plot.hist(bins=50)

In [None]:
train['c_p2p_diff'].plot.hist(bins=50)

In [None]:
# train['c_sleep_diff'].plot.hist(bins=50)

In [None]:
train['c_p_diff'].apply(lambda x: min(x, 10)).plot.hist(bins=50)

In [None]:
train['c_i_diff'].plot.hist(bins=50)

In [None]:
# train['p2p_sleep_diff'].plot.hist(bins=50)

In [None]:
# train['p2p_count_diff'].plot.hist(bins=50)

In [None]:
train['p2p_p_diff'].apply(lambda x: min(x, 10)).plot.hist(bins=50)

In [None]:
train['p2p_i_diff'].plot.hist(bins=50)

In [None]:
train.head()

In [None]:
test.head()

In [None]:
train['authorized_flag_mean'].mean()

In [None]:
test['authorized_flag_mean'].mean()

In [None]:
print("Train Shape:", train.shape)
print("Test Shape:", test.shape)
gc.collect()

In [None]:
def summary(df):
    stats = []
    for col in df.columns:
        stats.append((col, df[col].nunique(), 
                      df[col].isnull().sum() * 100 / df.shape[0], 
                      df[col].value_counts(normalize=True, dropna=False).values[0] * 100, 
                      df[col].dtype))
    
    return pd.DataFrame(stats, columns=['feature', 'unique', 'missing', 'mode', 'type'])

In [None]:
train_summary = summary(train)
test_summary = summary(test)

In [None]:
tmp_1 = train_summary.sort_values('mode', ascending=False)
tmp_2 = test_summary.sort_values('mode', ascending=False)

In [None]:
tmp_1

In [None]:
tmp_2

In [None]:
train.to_csv("./data/pre_train.csv")

In [None]:
test.to_csv("./data/pre_test.csv")

In [None]:
tmp = train['target']

In [None]:
tmp.plot.hist(bins=50)

In [None]:
(tmp*np.log10(2)).plot.hist(bins=50)

In [None]:
np.log2((np.exp2(tmp) - 0.0000000001) + 1).plot.hist(bins=50)