In [149]:
# Admin things
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)
from time import time
import pickle
import copy

# Techy things
from sqlalchemy import create_engine
import boto3
import os
import json

# Number things
import pandas as pd
import numpy as np
import math
from scipy import interp

# Picture things
import matplotlib.pyplot as plt
import seaborn as sns
from jupyterthemes import jtplot
from IPython.display import display

# Machine learning things
from sklearn import preprocessing
from sklearn import model_selection
from sklearn import linear_model
import shap

# My things
from src import utils
from src import fi


# Directories
pickle_dir = 'pickles/iplayer'
log_dir = 'logs/iplayer'

# Plot distributions of variables? Slow on large datasets
plot_distributions = False
save_shap_sample = True

# Fetch credentials from AWS
aws_creds = utils.aws_fetch_creds()
secret_dict = utils.aws_fetch_secret('users/alex_philpotts/live/credentials')
engine_str = 'postgresql://%s:%s@localhost:5439/redshiftdb' % (
    secret_dict['redshift_username'],
    secret_dict['redshift_password'])

engine = create_engine(engine_str)

pd.options.display.max_columns = None
query = """select * from central_insights_sandbox.ap_churn_iplayer_score_sample"""
df = pd.read_sql_query(query, engine)

query = """select * from central_insights_sandbox.ap_churn_iplayer_score_sample_profiler"""
profiles = pd.read_sql_query(query, engine)

print("Data loaded\n")

# Filter to 'active last week', or relevant filter criteria for the model
df = df[df.active_last_week == 1]

## =======================
## TREATMENT
## =======================

# IMPUTING MISSING COLUMNS ==============

# Retrieving the missing value imputer
mvi = utils.unpickle(pickle_dir+'/prep/missing_value_imputer')

# Imputimg missing values on the fresh data
df_impute = mvi.score(df)

# Replacing missing value columns in fresh data with imputed columns
non_imputed = [c for c in df.columns if c not in mvi.impute_strategies.colname.values]
df = pd.concat([df[non_imputed], df_impute],  axis=1)

print("Missing value imputation completed\n")



# TRANSFORMATIONS ==============
#   --- copying the dataframe before treatment so we can average unencoded features
df_repr = copy.copy(df)

# Sqrt variables with heavily lopsided distributions
sqrt_candidates = utils.unpickle( pickle_dir+'/prep/sqrt_candidates')
df_sqrt = df[sqrt_candidates].apply(func=np.sqrt,axis=0).rename(columns=lambda x: 'sqrt_'+x)

print("Sqrt transformations completed\n")

# One-hot encoding
oh_encoder = utils.unpickle( pickle_dir+'/prep/oh_encoder')

df_OH = oh_encoder.score(df)

print("One-hot encoding completed\n")


# CONTROL ================

id_vars = utils.unpickle(pickle_dir+'/prep/id_vars')
eligibles_untreated = utils.unpickle(pickle_dir+'/prep/eligibles_untreated')
eligibles = utils.unpickle(pickle_dir+'/prep/eligibles')

# Drop deprecated / useless variables
df = pd.concat([df[id_vars + eligibles_untreated], df_sqrt, df_OH], axis=1)

target = 'target_churn_next_week'

X = df[['bbc_hid3', 'target_week_start_date']+eligibles]
X.set_index(['bbc_hid3', 'target_week_start_date'], inplace=True)
X.index.names = ['bbc_hid3', 'target_week_start_date']

# REPRESENTATIVE USERS ==============

# How we're building average users:
#  - Median of untreated / sqrted vars
#  - Mode of OH encoded vars (before OH encoding, then apply OH enconding and re-attach).
#        Tried just taking medians, but post-OH encoding this returns 0 for basically all categoricals,
#        so having to do it the long-winded way.

# Average users
median_cols = eligibles_untreated + ['sqrt_' + c for c in sqrt_candidates]
median_all = df[median_cols].median(axis=0)

# on untreated data, then applying OH encoding
mode_all = df_repr[oh_encoder.candidates].mode(axis=0)
mode_oh_all = oh_encoder.score(mode_all)

unencoded_eligibles = median_cols + oh_encoder.candidates

# 16-34 user
median_1634 = (
    df[['bbc_hid3'] + median_cols]
    .merge(profiles, how='left', on='bbc_hid3')
    .query('age_1634 == 1')
    .filter(items = median_cols)
    .median(axis=0)
)

mode_1634 = (
    df_repr[['bbc_hid3']+oh_encoder.candidates]
    .merge(profiles, how='left', on='bbc_hid3')
    .query('age_1634 == 1')
    .filter(items=oh_encoder.candidates)
    .mode(axis=0)
)
mode_oh_1634 = oh_encoder.score(mode_1634)

average_users = pd.DataFrame({
    'base': median_all.append(mode_oh_all.iloc[0,]),
    'u35': median_1634.append(mode_oh_1634.iloc[0,])
}, index=eligibles)
average_df = average_users.T
average_df.index.names = ['group']

average_users.to_csv(log_dir+'/average_users.csv')

AWS access key ID:  ASIAZSDWL7RXPR67M67O
AWS secret access key:  wiP0jR5YMhBD8lH9uwXvpL/s7iROXfJPAhDqgggh
AWS default region:  eu-west-1
AWS session token:  FwoGZXIvYXdzECUaDPvLdFiMeV3RR+mH5CK8Ac4ZJ1OmImBp1quzQxnMvSxz335brKz3ZBCXWV4iCeKgGIr9iFOjHiMnWHrANnhMNX1b4d/lokcxz8EuNP89/niFhQ64cZL0XNcxzqrAxZuWplyr9nnOThqAJ1iEA1pacstN6rR3ErD4ehkg0w5abnnnrSqF4SAqQ6DY28BWttCmJLx+ZIWw3ZxMgFEdVDLBdKXtgEvRuq00Ed5cnkOmjccI7aS8iWCY6AXaDacmZISvQqc5rBwSe3RdvcrUKLXB7/UFMi2pSK3JWDhOnG8vlaz/215V+iDmE2BikWai7hSWLcJqy6uYlqT7MlG0xkNQKBU=


ClientError: An error occurred (ExpiredTokenException) when calling the GetSecretValue operation: The security token included in the request is expired

In [264]:
# BUILD MODIFIED USERS ==============

# Read modifications from csv
modifiers  = pd.read_csv('data/modifiers_average_user.csv', index_col=0)
base_user = pd.DataFrame({
    'user_id': 'base',
    'feature': unencoded_eligibles,
    'baseval': median_all.append(mode_all.iloc[0,])
},
index=unencoded_eligibles
)
base_user.to_csv(log_dir+'/base_test.csv')
# Nudge or replace values depending on method column
modifiers['mod_column'] = modifiers.index

# Cross join the base onto all modifiers (we want one modification per row)
base_user['mergekey'] = 0
modifiers['mergekey'] = 0
modified_all = modifiers.merge(base_user, on='mergekey', how='inner')
base_user.drop('mergekey', axis=1, inplace=True)
modifiers.drop('mergekey', axis=1, inplace=True)


modified_all['newval'] = modified_all['baseval']

modified_all.set_index(['user_id'], inplace=True)
delta_rows = (modified_all['method'] == 'delta') & (modified_all['feature'] == modified_all['mod_column'])
modified_all.loc[delta_rows, 'newval'] = modified_all.loc[delta_rows, 'baseval'] + \
    modified_all.loc[delta_rows, 'modifier'].astype(float)

replace_rows = (modified_all['method'] == 'replace') & (modified_all['feature'] == modified_all['mod_column'])
modified_all.loc[replace_rows, 'newval'] = modified_all.loc[replace_rows, 'modifier']
modified_all.drop(['mergekey','method','modifier','baseval'], axis=1, inplace=True)

modified_all = modified_all.set_index(['mod_column', 'feature'], append=True).unstack('feature')

modified_all.columns = modified_all.columns.droplevel()

# Converting base user into same format so we can stack it on
base_user['mod_column'] = 'none'
base_user = base_user.reset_index().set_index(['user_id', 'mod_column', 'index']).drop('feature', axis=1).unstack()
base_user.columns = base_user.columns.droplevel()

# modified_all = pd.concat([base_user, modified_all], axis=0)
def tidy_encode(df, num_vars, encoder):
    "Handling the encoding of categorical variables and converting numericals to floats"
    treated_df = pd.concat(
        [df[num_vars].astype(float),
         encoder.score(df[encoder.candidates])
        ],
    axis=1
    )
    return treated_df

base_user = tidy_encode(base_user, eligibles_untreated, oh_encoder)
modified_users = tidy_encode(modified_all, eligibles_untreated, oh_encoder)

Unnamed: 0_level_0,Unnamed: 1_level_0,profile_enablepersonalisation,profile_mailverified,profile_acc_age_days,profile_age_1634_enriched,streaming_time_13w,stw_2,stw_3,stw_4,stw_5,stw_6,stw_7,stw_8,stw_9,stw_10,stw_11,stw_12,stw_13,stw_14,ew_2,ew_3,ew_4,ew_5,ew_6,ew_7,ew_8,ew_9,ew_10,ew_11,ew_12,ew_13,ew_14,iplayer_lin_reg_coeff,iplayer_13w_yintercept,iplayer_13w_xintercept,iplayer_lin_reg_churn_flag,sounds_user,sounds_active_last_week,sounds_lin_reg_coeff,sounds_13w_yintercept,sounds_13w_xintercept,sounds_lin_reg_churn_flag,genre_share_comedy,genre_share_drama,genre_share_ents,genre_share_childrens,genre_share_factual,genre_share_learning,genre_share_music,genre_share_news,genre_share_religion,genre_share_sport,genre_share_weather,genre_distinct_count,releases_comedies,releases_dramas,releases_ents,releases_childrens,releases_factual,releases_learning,releases_music,releases_news,releases_religion,releases_sport,scaled_releases_comedies,scaled_releases_dramas,scaled_releases_ents,scaled_releases_childrens,scaled_releases_factual,scaled_releases_learning,scaled_releases_music,scaled_releases_news,scaled_releases_religion,scaled_releases_sport,scaled_releases_weather,sched_match_index_comedy,sched_match_index_drama,sched_match_index_ents,sched_match_index_childrens,sched_match_index_factual,sched_match_index_learning,sched_match_index_music,sched_match_index_news,sched_match_index_religion,sched_match_index_sport,sched_match_index_weather,sched_match_index,lw_distinct_series,lw_distinct_episodes,lw_series_premieres,lw_series_finales,lw_watched_finale_flag,lw_avg_episode_repeats,sounds_device_count,iplayer_device_count,device_iplayer_st_desktop_web_perc,device_iplayer_st_mobile_web_perc,device_iplayer_st_app_perc,device_iplayer_st_smart_tv_perc,device_iplayer_st_other_web_perc,device_iplayer_ev_desktop_web_perc,device_iplayer_ev_mobile_web_perc,device_iplayer_ev_app_perc,device_iplayer_ev_smart_tv_perc,device_iplayer_ev_other_web_perc,iplayer_activ_f0,iplayer_activ_f1,iplayer_activ_f2,iplayer_activ_f3,iplayer_activ_f4,iplayer_activ_f5,iplayer_activ_f6,iplayer_activ_f7,iplayer_activ_f8,iplayer_activ_f9,iplayer_weeks_since_activation,iplayer_fav_f0,iplayer_fav_f1,iplayer_fav_f2,iplayer_fav_f3,iplayer_fav_f4,iplayer_fav_f5,iplayer_fav_f6,iplayer_fav_f7,iplayer_fav_f8,iplayer_fav_f9,mkt_opted_in,mkt_days_opted_in,mkt_days_opted_out,mkt_email_opens_lw,mkt_email_opens_13w,mkt_email_clicks_lw,mkt_email_clicks_13,iplayer_programme_follows_13w,iplayer_programme_follows_lastweek,profile_age,profile_nation_England,profile_nation_Northern Ireland,profile_nation_Scotland,profile_nation_Wales,profile_barb_region_East of England,profile_barb_region_London,profile_barb_region_Midlands East,profile_barb_region_Midlands West,profile_barb_region_North East and Cumbria,profile_barb_region_North West,profile_barb_region_Scotland,profile_barb_region_South,profile_barb_region_South East,profile_barb_region_South West,profile_barb_region_Ulster,profile_barb_region_Wales,profile_barb_region_West,profile_barb_region_Yorkshire and Lincolnshire,profile_acorn_type_description_Active Communal Population,profile_acorn_type_description_Affluent professionals,profile_acorn_type_description_Asset rich families,profile_acorn_type_description_Better-off villagers,profile_acorn_type_description_Business areas without resident population,profile_acorn_type_description_Career driven young families,profile_acorn_type_description_Comfortably-off families in modern housing,profile_acorn_type_description_Deprived and ethnically diverse in flats,profile_acorn_type_description_Deprived areas and high-rise flats,profile_acorn_type_description_Educated families in terraces young children,profile_acorn_type_description_Educated young people in flats and tenements,profile_acorn_type_description_Elderly people in social rented flats,profile_acorn_type_description_Elderly singles in purpose-built accommodation,profile_acorn_type_description_Established suburbs older families,profile_acorn_type_description_Exclusive enclaves,profile_acorn_type_description_Fading owner occupied terraces,profile_acorn_type_description_Families in right-to-buy estates,profile_acorn_type_description_Farms and cottages,profile_acorn_type_description_Financially comfortable families,profile_acorn_type_description_First time buyers in small modern homes,profile_acorn_type_description_High occupancy terraces many Asian families,profile_acorn_type_description_Inactive Communal Population,profile_acorn_type_description_Labouring semi-rural estates,profile_acorn_type_description_Large house luxury,profile_acorn_type_description_Larger families in rural areas,profile_acorn_type_description_Larger family homes multi-ethnic areas,profile_acorn_type_description_Low cost flats in suburban areas,profile_acorn_type_description_Low income large families in social rented semis,profile_acorn_type_description_Low income older people in smaller semis,profile_acorn_type_description_Low income terraces,profile_acorn_type_description_Metropolitan money,profile_acorn_type_description_Metropolitan professionals,profile_acorn_type_description_Mixed metropolitan areas,profile_acorn_type_description_Multi-ethnic purpose-built estates,profile_acorn_type_description_Older people neat and tidy neighbourhoods,profile_acorn_type_description_Owner occupied terraces average income,profile_acorn_type_description_Owner occupiers in small towns and villages,profile_acorn_type_description_Pensioners and singles in social rented flats,profile_acorn_type_description_Pensioners in social housing semis and terraces,profile_acorn_type_description_Poorer families many children terraced housing,profile_acorn_type_description_Post-war estates limited means,profile_acorn_type_description_Prosperous suburban families,profile_acorn_type_description_Retired and empty nesters,profile_acorn_type_description_Semi-professional families owner occupied neighbourhoods,profile_acorn_type_description_Semi-skilled workers in traditional neighbourhoods,profile_acorn_type_description_Settled suburbia older people,profile_acorn_type_description_Singles and young families some receiving benefits,profile_acorn_type_description_Smaller houses and starter homes,profile_acorn_type_description_Social rented flats families and single parents,profile_acorn_type_description_Socialising young renters,profile_acorn_type_description_Struggling young families in post-war terraces,profile_acorn_type_description_Struggling younger people in mixed tenure,profile_acorn_type_description_Student flats and halls of residence,profile_acorn_type_description_Suburban semis conventional attitudes,profile_acorn_type_description_Term-time terraces,profile_acorn_type_description_Townhouse cosmopolitans,profile_acorn_type_description_Upmarket downsizers,profile_acorn_type_description_Wealthy countryside commuters,profile_acorn_type_description_Well-off edge of towners,profile_acorn_type_description_Young families in low cost private flats,profile_acorn_type_description_Young people in small low cost terraces,profile_acorn_type_description_Younger professionals in smaller flats,profile_acorn_group_description_Career Climbers,profile_acorn_group_description_City Sophisticates,profile_acorn_group_description_Comfortable Seniors,profile_acorn_group_description_Countryside Communities,profile_acorn_group_description_Difficult Circumstances,profile_acorn_group_description_Executive Wealth,profile_acorn_group_description_Lavish Lifestyles,profile_acorn_group_description_Mature Money,profile_acorn_group_description_Modest Means,profile_acorn_group_description_Not Private Households,profile_acorn_group_description_Poorer Pensioners,profile_acorn_group_description_Starting Out,profile_acorn_group_description_Steady Neighbourhoods,profile_acorn_group_description_Striving Families,profile_acorn_group_description_Struggling Estates,profile_acorn_group_description_Student Life,profile_acorn_group_description_Successful Suburbs,profile_acorn_group_description_Young Hardship,profile_acorn_category_description_Affluent Achievers,profile_acorn_category_description_Comfortable Communities,profile_acorn_category_description_Financially Stretched,profile_acorn_category_description_Non Private Households,profile_acorn_category_description_Rising Prosperity,profile_acorn_category_description_Urban Adversity,profile_gender_enriched_female,profile_gender_enriched_male,profile_gender_enriched_other,profile_gender_enriched_prefer not to say,iplayer_activating_genre_Children's,iplayer_activating_genre_Comedy,iplayer_activating_genre_Drama,iplayer_activating_genre_Entertainment,iplayer_activating_genre_Factual,iplayer_activating_genre_Learning,iplayer_activating_genre_Music,iplayer_activating_genre_News,iplayer_activating_genre_Religion & Ethics,iplayer_activating_genre_Sport,iplayer_activating_genre_Weather,iplayer_activating_masterbrand_BBC Four,iplayer_activating_masterbrand_BBC One,iplayer_activating_masterbrand_BBC Radio 1,iplayer_activating_masterbrand_BBC Radio 1Xtra,iplayer_activating_masterbrand_BBC Radio 2,iplayer_activating_masterbrand_BBC Radio 3,iplayer_activating_masterbrand_BBC Radio 5 live,iplayer_activating_masterbrand_BBC Three,iplayer_activating_masterbrand_BBC Two,iplayer_activating_masterbrand_CBBC,iplayer_activating_masterbrand_CBeebies,iplayer_activating_masterbrand_Other,iplayer_activating_masterbrand_Other Radio,sounds_fav_content_genre_Children's,sounds_fav_content_genre_Comedy,sounds_fav_content_genre_Drama,sounds_fav_content_genre_Entertainment,sounds_fav_content_genre_Factual,sounds_fav_content_genre_Learning,sounds_fav_content_genre_Music,sounds_fav_content_genre_News,sounds_fav_content_genre_Religion & Ethics,sounds_fav_content_genre_Sport,sounds_fav_content_genre_Weather,sounds_fav_content_masterbrand_BBC Radio 1,sounds_fav_content_masterbrand_BBC Radio 1Xtra,sounds_fav_content_masterbrand_BBC Radio 2,sounds_fav_content_masterbrand_BBC Radio 3,sounds_fav_content_masterbrand_BBC Radio 4,sounds_fav_content_masterbrand_BBC Radio 5 live,sounds_fav_content_masterbrand_BBC Radio 5 live sports extra,sounds_fav_content_masterbrand_BBC Radio 6 Music,sounds_fav_content_masterbrand_BBC Two,sounds_fav_content_masterbrand_BBC World Service,sounds_fav_content_masterbrand_Other,sounds_fav_content_masterbrand_Other Radio,iplayer_fav_content_genre_Children's,iplayer_fav_content_genre_Comedy,iplayer_fav_content_genre_Drama,iplayer_fav_content_genre_Entertainment,iplayer_fav_content_genre_Factual,iplayer_fav_content_genre_Learning,iplayer_fav_content_genre_Music,iplayer_fav_content_genre_News,iplayer_fav_content_genre_Religion & Ethics,iplayer_fav_content_genre_Sport,iplayer_fav_content_genre_Weather,iplayer_fav_content_masterbrand_BBC Four,iplayer_fav_content_masterbrand_BBC One,iplayer_fav_content_masterbrand_BBC Radio 1,iplayer_fav_content_masterbrand_BBC Radio 2,iplayer_fav_content_masterbrand_BBC Radio 3,iplayer_fav_content_masterbrand_BBC Three,iplayer_fav_content_masterbrand_BBC Two,iplayer_fav_content_masterbrand_CBBC,iplayer_fav_content_masterbrand_CBeebies,iplayer_fav_content_masterbrand_Other,iplayer_fav_content_masterbrand_Other Radio,freq_seg_latest_iplayer_A. daily (5+ days per week),freq_seg_latest_iplayer_B. 2-5 days per week,freq_seg_latest_iplayer_C. 1-2 days per week,freq_seg_latest_iplayer_D. fortnightly,freq_seg_latest_iplayer_E. monthly,freq_seg_latest_iplayer_F. less than monthly,freq_seg_latest_iplayer_G. last seen 13-26 weeks ago,freq_seg_latest_iplayer_H. last seen 26-52 weeks ago,freq_seg_latest_iplayer_I. dormant,freq_seg_latest_iplayer_X. Inactive,freq_seg_latest_sounds_A. daily (5+ days per week),freq_seg_latest_sounds_B. 2-5 days per week,freq_seg_latest_sounds_C. 1-2 days per week,freq_seg_latest_sounds_D. fortnightly,freq_seg_latest_sounds_E. monthly,freq_seg_latest_sounds_F. less than monthly,freq_seg_latest_sounds_G. last seen 13-26 weeks ago,freq_seg_latest_sounds_H. last seen 26-52 weeks ago,freq_seg_latest_sounds_I. dormant,freq_seg_latest_sounds_X. Inactive,freq_seg_latest_news_A. daily (5+ days per week),freq_seg_latest_news_B. 2-5 days per week,freq_seg_latest_news_C. 1-2 days per week,freq_seg_latest_news_D. fortnightly,freq_seg_latest_news_E. monthly,freq_seg_latest_news_F. less than monthly,freq_seg_latest_news_G. last seen 13-26 weeks ago,freq_seg_latest_news_H. last seen 26-52 weeks ago,freq_seg_latest_news_I. dormant,freq_seg_latest_news_X. Inactive,freq_seg_latest_sport_A. daily (5+ days per week),freq_seg_latest_sport_B. 2-5 days per week,freq_seg_latest_sport_C. 1-2 days per week,freq_seg_latest_sport_D. fortnightly,freq_seg_latest_sport_E. monthly,freq_seg_latest_sport_F. less than monthly,freq_seg_latest_sport_G. last seen 13-26 weeks ago,freq_seg_latest_sport_H. last seen 26-52 weeks ago,freq_seg_latest_sport_I. dormant,freq_seg_latest_sport_X. Inactive,freq_seg_latest_cbbc_A. daily (5+ days per week),freq_seg_latest_cbbc_B. 2-5 days per week,freq_seg_latest_cbbc_C. 1-2 days per week,freq_seg_latest_cbbc_D. fortnightly,freq_seg_latest_cbbc_E. monthly,freq_seg_latest_cbbc_F. less than monthly,freq_seg_latest_cbbc_G. last seen 13-26 weeks ago,freq_seg_latest_cbbc_H. last seen 26-52 weeks ago,freq_seg_latest_cbbc_I. dormant,freq_seg_latest_cbbc_X. Inactive,freq_seg_latest_cbeebies_A. daily (5+ days per week),freq_seg_latest_cbeebies_B. 2-5 days per week,freq_seg_latest_cbeebies_C. 1-2 days per week,freq_seg_latest_cbeebies_D. fortnightly,freq_seg_latest_cbeebies_E. monthly,freq_seg_latest_cbeebies_F. less than monthly,freq_seg_latest_cbeebies_G. last seen 13-26 weeks ago,freq_seg_latest_cbeebies_H. last seen 26-52 weeks ago,freq_seg_latest_cbeebies_I. dormant,freq_seg_latest_cbeebies_X. Inactive,freq_seg_latest_weather_A. daily (5+ days per week),freq_seg_latest_weather_B. 2-5 days per week,freq_seg_latest_weather_C. 1-2 days per week,freq_seg_latest_weather_D. fortnightly,freq_seg_latest_weather_E. monthly,freq_seg_latest_weather_F. less than monthly,freq_seg_latest_weather_G. last seen 13-26 weeks ago,freq_seg_latest_weather_H. last seen 26-52 weeks ago,freq_seg_latest_weather_I. dormant,freq_seg_latest_weather_X. Inactive,freq_seg_latest_panbbc_A. daily (5+ days per week),freq_seg_latest_panbbc_B. 2-5 days per week,freq_seg_latest_panbbc_C. 1-2 days per week,freq_seg_latest_panbbc_D. fortnightly,freq_seg_latest_panbbc_E. monthly,freq_seg_latest_panbbc_F. less than monthly,freq_seg_latest_panbbc_G. last seen 13-26 weeks ago,freq_seg_latest_panbbc_H. last seen 26-52 weeks ago,freq_seg_latest_panbbc_I. dormant,freq_seg_latest_panbbc_X. Inactive,device_sounds_st_preferred_desktop-web,device_sounds_st_preferred_mobile-app,device_sounds_st_preferred_mobile-web,device_sounds_st_preferred_other-web,device_sounds_st_preferred_smart-tv,device_iplayer_st_preferred_desktop-web,device_iplayer_st_preferred_mobile-app,device_iplayer_st_preferred_mobile-web,device_iplayer_st_preferred_other-web,device_iplayer_st_preferred_smart-tv,device_sounds_ev_preferred_desktop-web,device_sounds_ev_preferred_mobile-app,device_sounds_ev_preferred_mobile-web,device_sounds_ev_preferred_other-web,device_sounds_ev_preferred_smart-tv,device_iplayer_ev_preferred_desktop-web,device_iplayer_ev_preferred_mobile-app,device_iplayer_ev_preferred_mobile-web,device_iplayer_ev_preferred_other-web,device_iplayer_ev_preferred_smart-tv
user_id,mod_column,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1,Unnamed: 266_level_1,Unnamed: 267_level_1,Unnamed: 268_level_1,Unnamed: 269_level_1,Unnamed: 270_level_1,Unnamed: 271_level_1,Unnamed: 272_level_1,Unnamed: 273_level_1,Unnamed: 274_level_1,Unnamed: 275_level_1,Unnamed: 276_level_1,Unnamed: 277_level_1,Unnamed: 278_level_1,Unnamed: 279_level_1,Unnamed: 280_level_1,Unnamed: 281_level_1,Unnamed: 282_level_1,Unnamed: 283_level_1,Unnamed: 284_level_1,Unnamed: 285_level_1,Unnamed: 286_level_1,Unnamed: 287_level_1,Unnamed: 288_level_1,Unnamed: 289_level_1,Unnamed: 290_level_1,Unnamed: 291_level_1,Unnamed: 292_level_1,Unnamed: 293_level_1,Unnamed: 294_level_1,Unnamed: 295_level_1,Unnamed: 296_level_1,Unnamed: 297_level_1,Unnamed: 298_level_1,Unnamed: 299_level_1,Unnamed: 300_level_1,Unnamed: 301_level_1,Unnamed: 302_level_1,Unnamed: 303_level_1,Unnamed: 304_level_1,Unnamed: 305_level_1,Unnamed: 306_level_1,Unnamed: 307_level_1,Unnamed: 308_level_1,Unnamed: 309_level_1,Unnamed: 310_level_1,Unnamed: 311_level_1,Unnamed: 312_level_1,Unnamed: 313_level_1,Unnamed: 314_level_1,Unnamed: 315_level_1,Unnamed: 316_level_1,Unnamed: 317_level_1,Unnamed: 318_level_1,Unnamed: 319_level_1,Unnamed: 320_level_1,Unnamed: 321_level_1,Unnamed: 322_level_1,Unnamed: 323_level_1,Unnamed: 324_level_1,Unnamed: 325_level_1,Unnamed: 326_level_1,Unnamed: 327_level_1,Unnamed: 328_level_1,Unnamed: 329_level_1,Unnamed: 330_level_1,Unnamed: 331_level_1,Unnamed: 332_level_1,Unnamed: 333_level_1,Unnamed: 334_level_1,Unnamed: 335_level_1,Unnamed: 336_level_1,Unnamed: 337_level_1,Unnamed: 338_level_1,Unnamed: 339_level_1,Unnamed: 340_level_1,Unnamed: 341_level_1,Unnamed: 342_level_1,Unnamed: 343_level_1,Unnamed: 344_level_1,Unnamed: 345_level_1,Unnamed: 346_level_1,Unnamed: 347_level_1,Unnamed: 348_level_1,Unnamed: 349_level_1,Unnamed: 350_level_1,Unnamed: 351_level_1,Unnamed: 352_level_1,Unnamed: 353_level_1,Unnamed: 354_level_1,Unnamed: 355_level_1,Unnamed: 356_level_1,Unnamed: 357_level_1,Unnamed: 358_level_1,Unnamed: 359_level_1,Unnamed: 360_level_1,Unnamed: 361_level_1,Unnamed: 362_level_1,Unnamed: 363_level_1,Unnamed: 364_level_1,Unnamed: 365_level_1,Unnamed: 366_level_1,Unnamed: 367_level_1,Unnamed: 368_level_1,Unnamed: 369_level_1,Unnamed: 370_level_1,Unnamed: 371_level_1,Unnamed: 372_level_1,Unnamed: 373_level_1,Unnamed: 374_level_1,Unnamed: 375_level_1,Unnamed: 376_level_1,Unnamed: 377_level_1,Unnamed: 378_level_1,Unnamed: 379_level_1,Unnamed: 380_level_1,Unnamed: 381_level_1,Unnamed: 382_level_1,Unnamed: 383_level_1,Unnamed: 384_level_1,Unnamed: 385_level_1,Unnamed: 386_level_1,Unnamed: 387_level_1,Unnamed: 388_level_1,Unnamed: 389_level_1,Unnamed: 390_level_1,Unnamed: 391_level_1,Unnamed: 392_level_1,Unnamed: 393_level_1,Unnamed: 394_level_1,Unnamed: 395_level_1,Unnamed: 396_level_1,Unnamed: 397_level_1,Unnamed: 398_level_1,Unnamed: 399_level_1,Unnamed: 400_level_1,Unnamed: 401_level_1,Unnamed: 402_level_1,Unnamed: 403_level_1,Unnamed: 404_level_1,Unnamed: 405_level_1,Unnamed: 406_level_1,Unnamed: 407_level_1,Unnamed: 408_level_1,Unnamed: 409_level_1,Unnamed: 410_level_1,Unnamed: 411_level_1,Unnamed: 412_level_1,Unnamed: 413_level_1
base,freq_seg_latest_news,1.0,0.0,876.0,0.0,63068.5,5255.5,3438.0,2999.0,2849.0,2288.0,2520.0,2722.5,3453.0,2834.0,1618.0,1872.0,1761.0,1757.5,21.0,15.0,14.0,13.0,11.0,12.0,13.0,14.0,12.0,10.0,11.0,10.0,10.0,-0.346154,18.884615,17.101266,1.0,0.0,0.0,0.0,0.807692,14.0,0.0,0.024075,0.350202,0.0,0.0,0.13838,0.0,0.0,0.0,0.0,0.0,0.0,5.0,8.0,16.0,14.0,55.0,190.0,2.0,36.0,89.0,2.0,45.0,0.781955,0.967442,1.144654,1.118936,1.213163,2.363636,1.271739,1.147817,0.742857,1.794479,0.928571,-0.00525,-0.011402,0.0,0.0,0.029498,0.0,0.0,0.0,0.0,0.0,0.0,0.06098,1.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.882987,0.0,0.0,0.0,0.0,0.78481,0.0,0.075883,0.418513,-0.024913,0.012957,-0.135439,-0.098421,-0.157692,-0.000536,-0.22694,0.321448,23.0,0.075883,0.386932,-0.082854,-0.024653,-0.08804,-0.123171,-0.111607,0.082167,-0.24409,0.321448,0.0,873.5,871.0,0.0,0.0,0.0,0.0,0.0,0.0,44.0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1
base,freq_seg_latest_sounds,1.0,0.0,876.0,0.0,63068.5,5255.5,3438.0,2999.0,2849.0,2288.0,2520.0,2722.5,3453.0,2834.0,1618.0,1872.0,1761.0,1757.5,21.0,15.0,14.0,13.0,11.0,12.0,13.0,14.0,12.0,10.0,11.0,10.0,10.0,-0.346154,18.884615,17.101266,1.0,0.0,0.0,0.0,0.807692,14.0,0.0,0.024075,0.350202,0.0,0.0,0.13838,0.0,0.0,0.0,0.0,0.0,0.0,5.0,8.0,16.0,14.0,55.0,190.0,2.0,36.0,89.0,2.0,45.0,0.781955,0.967442,1.144654,1.118936,1.213163,2.363636,1.271739,1.147817,0.742857,1.794479,0.928571,-0.00525,-0.011402,0.0,0.0,0.029498,0.0,0.0,0.0,0.0,0.0,0.0,0.06098,1.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.882987,0.0,0.0,0.0,0.0,0.78481,0.0,0.075883,0.418513,-0.024913,0.012957,-0.135439,-0.098421,-0.157692,-0.000536,-0.22694,0.321448,23.0,0.075883,0.386932,-0.082854,-0.024653,-0.08804,-0.123171,-0.111607,0.082167,-0.24409,0.321448,0.0,873.5,871.0,0.0,0.0,0.0,0.0,0.0,0.0,44.0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1
base,freq_seg_latest_sport,1.0,0.0,876.0,0.0,63068.5,5255.5,3438.0,2999.0,2849.0,2288.0,2520.0,2722.5,3453.0,2834.0,1618.0,1872.0,1761.0,1757.5,21.0,15.0,14.0,13.0,11.0,12.0,13.0,14.0,12.0,10.0,11.0,10.0,10.0,-0.346154,18.884615,17.101266,1.0,0.0,0.0,0.0,0.807692,14.0,0.0,0.024075,0.350202,0.0,0.0,0.13838,0.0,0.0,0.0,0.0,0.0,0.0,5.0,8.0,16.0,14.0,55.0,190.0,2.0,36.0,89.0,2.0,45.0,0.781955,0.967442,1.144654,1.118936,1.213163,2.363636,1.271739,1.147817,0.742857,1.794479,0.928571,-0.00525,-0.011402,0.0,0.0,0.029498,0.0,0.0,0.0,0.0,0.0,0.0,0.06098,1.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.882987,0.0,0.0,0.0,0.0,0.78481,0.0,0.075883,0.418513,-0.024913,0.012957,-0.135439,-0.098421,-0.157692,-0.000536,-0.22694,0.321448,23.0,0.075883,0.386932,-0.082854,-0.024653,-0.08804,-0.123171,-0.111607,0.082167,-0.24409,0.321448,0.0,873.5,871.0,0.0,0.0,0.0,0.0,0.0,0.0,44.0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1
base,genre_distinct_count,1.0,0.0,876.0,0.0,63068.5,5255.5,3438.0,2999.0,2849.0,2288.0,2520.0,2722.5,3453.0,2834.0,1618.0,1872.0,1761.0,1757.5,21.0,15.0,14.0,13.0,11.0,12.0,13.0,14.0,12.0,10.0,11.0,10.0,10.0,-0.346154,18.884615,17.101266,1.0,0.0,0.0,0.0,0.807692,14.0,0.0,0.024075,0.350202,0.0,0.0,0.13838,0.0,0.0,0.0,0.0,0.0,0.0,6.0,8.0,16.0,14.0,55.0,190.0,2.0,36.0,89.0,2.0,45.0,0.781955,0.967442,1.144654,1.118936,1.213163,2.363636,1.271739,1.147817,0.742857,1.794479,0.928571,-0.00525,-0.011402,0.0,0.0,0.029498,0.0,0.0,0.0,0.0,0.0,0.0,0.06098,1.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.882987,0.0,0.0,0.0,0.0,0.78481,0.0,0.075883,0.418513,-0.024913,0.012957,-0.135439,-0.098421,-0.157692,-0.000536,-0.22694,0.321448,23.0,0.075883,0.386932,-0.082854,-0.024653,-0.08804,-0.123171,-0.111607,0.082167,-0.24409,0.321448,0.0,873.5,871.0,0.0,0.0,0.0,0.0,0.0,0.0,44.0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1
base,genre_share_childrens,1.0,0.0,876.0,0.0,63068.5,5255.5,3438.0,2999.0,2849.0,2288.0,2520.0,2722.5,3453.0,2834.0,1618.0,1872.0,1761.0,1757.5,21.0,15.0,14.0,13.0,11.0,12.0,13.0,14.0,12.0,10.0,11.0,10.0,10.0,-0.346154,18.884615,17.101266,1.0,0.0,0.0,0.0,0.807692,14.0,0.0,0.024075,0.350202,0.0,0.3,0.13838,0.0,0.0,0.0,0.0,0.0,0.0,5.0,8.0,16.0,14.0,55.0,190.0,2.0,36.0,89.0,2.0,45.0,0.781955,0.967442,1.144654,1.118936,1.213163,2.363636,1.271739,1.147817,0.742857,1.794479,0.928571,-0.00525,-0.011402,0.0,0.0,0.029498,0.0,0.0,0.0,0.0,0.0,0.0,0.06098,1.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.882987,0.0,0.0,0.0,0.0,0.78481,0.0,0.075883,0.418513,-0.024913,0.012957,-0.135439,-0.098421,-0.157692,-0.000536,-0.22694,0.321448,23.0,0.075883,0.386932,-0.082854,-0.024653,-0.08804,-0.123171,-0.111607,0.082167,-0.24409,0.321448,0.0,873.5,871.0,0.0,0.0,0.0,0.0,0.0,0.0,44.0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1
base,genre_share_drama,1.0,0.0,876.0,0.0,63068.5,5255.5,3438.0,2999.0,2849.0,2288.0,2520.0,2722.5,3453.0,2834.0,1618.0,1872.0,1761.0,1757.5,21.0,15.0,14.0,13.0,11.0,12.0,13.0,14.0,12.0,10.0,11.0,10.0,10.0,-0.346154,18.884615,17.101266,1.0,0.0,0.0,0.0,0.807692,14.0,0.0,0.024075,0.650202,0.0,0.0,0.13838,0.0,0.0,0.0,0.0,0.0,0.0,5.0,8.0,16.0,14.0,55.0,190.0,2.0,36.0,89.0,2.0,45.0,0.781955,0.967442,1.144654,1.118936,1.213163,2.363636,1.271739,1.147817,0.742857,1.794479,0.928571,-0.00525,-0.011402,0.0,0.0,0.029498,0.0,0.0,0.0,0.0,0.0,0.0,0.06098,1.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.882987,0.0,0.0,0.0,0.0,0.78481,0.0,0.075883,0.418513,-0.024913,0.012957,-0.135439,-0.098421,-0.157692,-0.000536,-0.22694,0.321448,23.0,0.075883,0.386932,-0.082854,-0.024653,-0.08804,-0.123171,-0.111607,0.082167,-0.24409,0.321448,0.0,873.5,871.0,0.0,0.0,0.0,0.0,0.0,0.0,44.0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1
base,iplayer_fav_content_genre,1.0,0.0,876.0,0.0,63068.5,5255.5,3438.0,2999.0,2849.0,2288.0,2520.0,2722.5,3453.0,2834.0,1618.0,1872.0,1761.0,1757.5,21.0,15.0,14.0,13.0,11.0,12.0,13.0,14.0,12.0,10.0,11.0,10.0,10.0,-0.346154,18.884615,17.101266,1.0,0.0,0.0,0.0,0.807692,14.0,0.0,0.024075,0.350202,0.0,0.0,0.13838,0.0,0.0,0.0,0.0,0.0,0.0,5.0,8.0,16.0,14.0,55.0,190.0,2.0,36.0,89.0,2.0,45.0,0.781955,0.967442,1.144654,1.118936,1.213163,2.363636,1.271739,1.147817,0.742857,1.794479,0.928571,-0.00525,-0.011402,0.0,0.0,0.029498,0.0,0.0,0.0,0.0,0.0,0.0,0.06098,1.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.882987,0.0,0.0,0.0,0.0,0.78481,0.0,0.075883,0.418513,-0.024913,0.012957,-0.135439,-0.098421,-0.157692,-0.000536,-0.22694,0.321448,23.0,0.075883,0.386932,-0.082854,-0.024653,-0.08804,-0.123171,-0.111607,0.082167,-0.24409,0.321448,0.0,873.5,871.0,0.0,0.0,0.0,0.0,0.0,0.0,44.0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1
base,iplayer_fav_content_masterbrand,1.0,0.0,876.0,0.0,63068.5,5255.5,3438.0,2999.0,2849.0,2288.0,2520.0,2722.5,3453.0,2834.0,1618.0,1872.0,1761.0,1757.5,21.0,15.0,14.0,13.0,11.0,12.0,13.0,14.0,12.0,10.0,11.0,10.0,10.0,-0.346154,18.884615,17.101266,1.0,0.0,0.0,0.0,0.807692,14.0,0.0,0.024075,0.350202,0.0,0.0,0.13838,0.0,0.0,0.0,0.0,0.0,0.0,5.0,8.0,16.0,14.0,55.0,190.0,2.0,36.0,89.0,2.0,45.0,0.781955,0.967442,1.144654,1.118936,1.213163,2.363636,1.271739,1.147817,0.742857,1.794479,0.928571,-0.00525,-0.011402,0.0,0.0,0.029498,0.0,0.0,0.0,0.0,0.0,0.0,0.06098,1.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.882987,0.0,0.0,0.0,0.0,0.78481,0.0,0.075883,0.418513,-0.024913,0.012957,-0.135439,-0.098421,-0.157692,-0.000536,-0.22694,0.321448,23.0,0.075883,0.386932,-0.082854,-0.024653,-0.08804,-0.123171,-0.111607,0.082167,-0.24409,0.321448,0.0,873.5,871.0,0.0,0.0,0.0,0.0,0.0,0.0,44.0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1
base,iplayer_programme_follows_13w,1.0,0.0,876.0,0.0,63068.5,5255.5,3438.0,2999.0,2849.0,2288.0,2520.0,2722.5,3453.0,2834.0,1618.0,1872.0,1761.0,1757.5,21.0,15.0,14.0,13.0,11.0,12.0,13.0,14.0,12.0,10.0,11.0,10.0,10.0,-0.346154,18.884615,17.101266,1.0,0.0,0.0,0.0,0.807692,14.0,0.0,0.024075,0.350202,0.0,0.0,0.13838,0.0,0.0,0.0,0.0,0.0,0.0,5.0,8.0,16.0,14.0,55.0,190.0,2.0,36.0,89.0,2.0,45.0,0.781955,0.967442,1.144654,1.118936,1.213163,2.363636,1.271739,1.147817,0.742857,1.794479,0.928571,-0.00525,-0.011402,0.0,0.0,0.029498,0.0,0.0,0.0,0.0,0.0,0.0,0.06098,1.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.882987,0.0,0.0,0.0,0.0,0.78481,0.0,0.075883,0.418513,-0.024913,0.012957,-0.135439,-0.098421,-0.157692,-0.000536,-0.22694,0.321448,23.0,0.075883,0.386932,-0.082854,-0.024653,-0.08804,-0.123171,-0.111607,0.082167,-0.24409,0.321448,0.0,873.5,871.0,0.0,0.0,0.0,0.0,10.0,0.0,44.0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1
base,iplayer_programme_follows_lastweek,1.0,0.0,876.0,0.0,63068.5,5255.5,3438.0,2999.0,2849.0,2288.0,2520.0,2722.5,3453.0,2834.0,1618.0,1872.0,1761.0,1757.5,21.0,15.0,14.0,13.0,11.0,12.0,13.0,14.0,12.0,10.0,11.0,10.0,10.0,-0.346154,18.884615,17.101266,1.0,0.0,0.0,0.0,0.807692,14.0,0.0,0.024075,0.350202,0.0,0.0,0.13838,0.0,0.0,0.0,0.0,0.0,0.0,5.0,8.0,16.0,14.0,55.0,190.0,2.0,36.0,89.0,2.0,45.0,0.781955,0.967442,1.144654,1.118936,1.213163,2.363636,1.271739,1.147817,0.742857,1.794479,0.928571,-0.00525,-0.011402,0.0,0.0,0.029498,0.0,0.0,0.0,0.0,0.0,0.0,0.06098,1.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.882987,0.0,0.0,0.0,0.0,0.78481,0.0,0.075883,0.418513,-0.024913,0.012957,-0.135439,-0.098421,-0.157692,-0.000536,-0.22694,0.321448,23.0,0.075883,0.386932,-0.082854,-0.024653,-0.08804,-0.123171,-0.111607,0.082167,-0.24409,0.321448,0.0,873.5,871.0,0.0,0.0,0.0,0.0,0.0,1.0,44.0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1


In [265]:
# print('\nModified:\n')
# modified_users

stack = utils.unpickle(pickle_dir+'/models/stack')

# REPRESENTATIVE USERS
base_prediction = stack.predict(base_user, save_weak_learner_predictions = True, save_shap_sample = save_shap_sample)
modified_predictions = stack.predict(modified_users, save_weak_learner_predictions = True, save_shap_sample = save_shap_sample)

# Build dataset of base values, modifications and predictions
pd.DataFrame({
    'base_prediction': base_prediction[0],
    'modified_prediction': modified_predictions
}, index=modified_users.index)

# modified_all[eligibles_untreated]
# oh_encoder.score(modified_all[oh_encoder.candidates])

0 MODEL:  Content : 



1 MODEL:  Behaviour : 



2 MODEL:  Marketing : 



3 MODEL:  Cross-Sell : 



Stacking.

Complete!

0 MODEL:  Content : 



1 MODEL:  Behaviour : 



2 MODEL:  Marketing : 



3 MODEL:  Cross-Sell : 



Stacking.

Complete!



Unnamed: 0_level_0,Unnamed: 1_level_0,base_prediction,modified_prediction
user_id,mod_column,Unnamed: 2_level_1,Unnamed: 3_level_1
base,freq_seg_latest_news,0.170733,0.172008
base,freq_seg_latest_sounds,0.170733,0.175178
base,freq_seg_latest_sport,0.170733,0.1722
base,genre_distinct_count,0.170733,0.145938
base,genre_share_childrens,0.170733,0.162289
base,genre_share_drama,0.170733,0.15854
base,iplayer_fav_content_genre,0.170733,0.170733
base,iplayer_fav_content_masterbrand,0.170733,0.170733
base,iplayer_programme_follows_13w,0.170733,0.170733
base,iplayer_programme_follows_lastweek,0.170733,0.170733


In [254]:
modifiers

Unnamed: 0,modifier,method,mod_column
lw_distinct_series,1,delta,lw_distinct_series
lw_distinct_episodes,1,delta,lw_distinct_episodes
lw_series_premieres,1,delta,lw_series_premieres
lw_series_finales,1,delta,lw_series_finales
iplayer_programme_follows_13w,10,delta,iplayer_programme_follows_13w
iplayer_programme_follows_lastweek,1,delta,iplayer_programme_follows_lastweek
genre_distinct_count,1,delta,genre_distinct_count
genre_share_childrens,0.3,delta,genre_share_childrens
genre_share_drama,0.3,delta,genre_share_drama
iplayer_fav_content_masterbrand,BBC Three,replace,iplayer_fav_content_masterbrand


In [None]:
# MODEL SCORING ==============

stack = utils.unpickle(pickle_dir+'/models/stack')

# REPRESENTATIVE USERS
repr_predictions = stack.predict(average_df, save_weak_learner_predictions = True, save_shap_sample = save_shap_sample)

repr_weak_learner_preds_wide = stack.weak_learner_predictions
repr_weak_learner_preds_long = pd.melt(
    repr_weak_learner_preds_wide.reset_index(), id_vars = 'group', var_name = 'weak_learner', value_name='prediction'
)
repr_weak_learner_classes_wide = stack.weak_learner_classifications
repr_weak_learner_classes_long = pd.melt(
    repr_weak_learner_classes_wide.reset_index(), id_vars = 'group', var_name = 'weak_learner', value_name='classification'
)

# Retrieving class threshold from the stack dictionary
repr_weak_learner_thresholds = pd.DataFrame(stack.weak_learner_thresholds.items(), columns = ['weak_learner', 'threshold'])
repr_weak_learner_classes_long = repr_weak_learner_classes_long.merge(repr_weak_learner_thresholds, left_on='weak_learner', right_on='weak_learner')

print(repr_weak_learner_preds_long.head(10))
print(repr_weak_learner_classes_long.head(10))

# PUSH SCORES TO REDSHIFT ==============
from datetime import datetime

# Export meta-model scores to Redshift
df_export_meta = pd.DataFrame({
    'representative_user': average_df.index,
    'learner_type': 'meta-learner',
    'learner_name': 'iplayer-meta',
    'predicted_probability': repr_predictions
})
print(df_export_meta.head())

df_export_weak = pd.DataFrame({
    'representative_user': repr_weak_learner_preds_long['group'],
    'learner_type': 'weak-learner',
    'learner_name': repr_weak_learner_preds_long['weak_learner'],
    'predicted_probability': repr_weak_learner_preds_long['prediction']
})
print(df_export_weak.head())

df_export = pd.concat([df_export_meta, df_export_weak])

df_export['score_datetime'] = datetime.today().strftime('%Y-%m-%d %H:%M:%S')


utils.rs_upload(df=df_export,
                s3_obj='s3://central-insights/philpa03/loyalty-propensity-scores/model-scores-reps',
                tmp='./data/model-scores-reps.csv',
                rs_table='central_insights_sandbox.ap_churn_iplayer_representative_user_scores',
                aws_creds=aws_creds, # to access AWS
                secret_dict=secret_dict, # to access Redshift
                truncate=True,
                csv=True,
                gzip=False
                )


# Export SHAP data to s3

# print(stack.shap_sample_dict['little_lgbm'].head(10))
if save_shap_sample:
    for (model_name, shap_values) in stack.shap_sample_dict.items():
    
        ### Update the archive with scores for this date - WILL OVERWRITE IF THE DATA ISN'T UPDATED
        shap_values.reset_index(level=['group'], inplace=True)
    
        # Add whole batch to the representatives folder in s3
        (shap_values
            .drop(['group'], axis=1)
            .to_csv('s3://central-insights/philpa03/loyalty/iplayer/shap-values/representatives/{model}.csv'.format(model = model_name), index=True)
            )

# Export feature importances to S3
for (model_name, fi) in stack.fi_dict.items():

    # Average the feature importance across folds and push to S3
    (pd.DataFrame(fi.importances.mean(1), columns=['gain'])
        .rename_axis('feature')
        .to_csv('s3://central-insights/philpa03/loyalty/iplayer/fi/representatives/{model}.csv'.format(model = model_name), index=True)
    )