In [70]:
import pandas as pd
from math import nan

In [71]:
user_content = pd.read_csv("data/azure/VAS_user_content.csv")

In [72]:
scored_df = pd.read_parquet("data/azure/scored/vas_scored.parquet")

### reules Definition

In [73]:
package_name = "VAS_Package"

In [74]:
"""
    Package Filtration by type
"""
def filter_packages_by_name(pack_name,col):
    if not isinstance(col, float) and col is not None and not isinstance (pack_name, float) and pack_name is not None:
        if "LTE" in pack_name:
            if "LTE" in col or "FTTH" not in col:
                return col
        if "Megaline" in pack_name:
            if "LTE" not in col:
                return col
        if "FTTH" in pack_name:
            if "LTE" not in col and "FTTH" in col:
                return col
        if "NO_INFO" in pack_name or "NO INFO" in pack_name:
            return col

"""
    Check subscription 
"""
def filter_packages_by_subscription(is_sub,col):
    if not isinstance(is_sub, float):
        if "YES" not in is_sub:
            return col
        if "NO_INFO" in is_sub or "NO INFO" in is_sub:
            return col

    
"""
    Filter by most subscription type
"""
def filter_packages_by_sub_type(ori_pack, col, sub_type):
    if not isinstance(col, float) and not isinstance(ori_pack, float):
        fin = nan
        if "SINGLE_PLAY" in sub_type:
            if "BB" not in col and ("Peo" not in col and "PEO" not in col and "peo" not in col):
                fin = col
        if "DOUBLE_PLAY_PV" in sub_type:
            if "BB" not in col:
                fin = col
        if "DOUBLE_PLAY_BV" in sub_type:
            if "Peo" not in col and "PEO" not in col and "peo" not in col:
                fin = col
        return fin 

In [75]:
scored_df.drop(["Predicted Rating 1","Predicted Rating 2","Predicted Rating 3","Predicted Rating 4","Predicted Rating 5"],axis=1, inplace=True)

In [76]:
scored_df = scored_df.merge(user_content[["ACCOUNT_NUM.hash",package_name,"is_Subscription","Product_Type","Sub_Type"]].drop_duplicates(["ACCOUNT_NUM.hash",package_name]),left_on="User", right_on="ACCOUNT_NUM.hash", how="left").drop("ACCOUNT_NUM.hash",axis=1)

In [77]:
recommender_cols = ["Recommended Item 1","Recommended Item 2","Recommended Item 3","Recommended Item 4","Recommended Item 5"]

### Applying Rules

In [78]:
for col in recommender_cols:
    scored_df[col] = scored_df.apply(lambda x: filter_packages_by_sub_type(x[package_name],x[col],x["Sub_Type"]), axis=1)

In [79]:
for col in recommender_cols:
    scored_df[col] = scored_df.apply(lambda x: filter_packages_by_name(x["Product_Type"],x[col]), axis=1)

In [80]:
scored_df.dropna(subset=recommender_cols,inplace=True)

In [81]:
scored_df.drop([package_name,"is_Subscription","Product_Type","Sub_Type"],axis=1, inplace=True)

In [82]:
# test
scored_df.shape

(1480, 6)

In [83]:
# test
scored_df[recommender_cols].value_counts() 

Recommended Item 1       Recommended Item 2           Recommended Item 3           Recommended Item 4                        Recommended Item 5                      
eChanneling Subcription  BB_ Entertainment Unlimited  Package Rental Through ADSL  Meet Max                                  AB_Additional Distance                      474
                                                                                                                             V_Detailed Bill                             461
                                                                                                                             Meet Lite                                   261
                                                                                   V_Detailed Bill                           Meet Max                                     54
                                                                                   AB_Additional Distance                    Meet Max         

In [84]:
scored_df.to_csv("data/azure/scored/vas_scored_filterd.csv")