In [26]:
import pandas as pd
from math import nan

In [27]:
user_content = pd.read_csv("data/azure/BB_user_content.csv")

In [28]:
scored_df = pd.read_parquet("data/azure/scored/bb_scored.parquet")

### reules Definition

In [29]:
package_name = "BB_Package"
bb_order = ["STUDENT PACKAGE 1", "LITE", "ENTREE", "STARTER", "STUDENT PACKAGE 2", "JOY", "PAL", "BEAT", "FAMILY PLUS", "FLIX", "FAMILY ACTIVE", "BLAZE", "FAMILY XTRA", "TIDE", "BOOSTER", "SPIKE", "PRO","STROM", "MASTER", "GLAM", "CHAMP", "DELIGHT", "LIFE", "XTREME", "INSPIRE", "PREMIER"]

In [30]:
"""
    Package Filtration by type
"""
def filter_packages_by_name(pack_name,col):
    if not isinstance(col, float) and not isinstance (pack_name, float):
        if "LTE" in pack_name:
            if "LTE" in col or "FTTH" not in col:
                return col
        if "Megaline" in pack_name:
            if "LTE" not in col:
                return col
        if "FTTH" in pack_name:
            if "LTE" not in col and "FTTH" in col:
                return col
        if "NO_INFO" in pack_name or "NO INFO" in pack_name:
            return col

"""
    Check subscription 
"""
def filter_packages_by_subscription(is_sub,col):
    if not isinstance(is_sub, float):
        if "YES" in is_sub:
            return col
        if "NO_INFO" in is_sub or "NO INFO" in is_sub:
            return col


"""
    Order by most prefrable package
"""
def order_packages_common(col_arr):
    if not isinstance(col_arr, float) and len(col_arr)>0:
        new_arr = []
        for col in col_arr:
            if col is not None:
                if "FTTH" in col:
                    new_arr.append(col)
                    col_arr.remove(col)
        for col in col_arr:
            if col is not None:
                if "LTE" not in col:
                    new_arr.append(col)
                    col_arr.remove(col)
        new_arr = new_arr + col_arr 
        return new_arr


"""
    Filter by most prefrable BB package
"""
def filter_packages_by_list(ori_pack, col, ord_list):
    if not isinstance(col, float) and not isinstance(ori_pack, float):
        pack_pos = 0
        for item in ord_list:
            if item in ori_pack:
                pack_pos = ord_list.index(item)
        comp_list = ord_list[:pack_pos]
        for item in comp_list:
            if item in col:
                return nan
        return col 

In [31]:
scored_df.drop(["Predicted Rating 1","Predicted Rating 2","Predicted Rating 3","Predicted Rating 4","Predicted Rating 5"],axis=1, inplace=True)

In [32]:
scored_df = scored_df.merge(user_content[["ACCOUNT_NUM.hash",package_name,"is_Subscription","Product_Type"]].drop_duplicates(["ACCOUNT_NUM.hash",package_name]),left_on="User", right_on="ACCOUNT_NUM.hash", how="left").drop("ACCOUNT_NUM.hash",axis=1)

In [33]:
recommender_cols = ["Recommended Item 1","Recommended Item 2","Recommended Item 3","Recommended Item 4","Recommended Item 5"]

### Applying Rules

In [34]:
for col in recommender_cols:
    scored_df[col] = scored_df.apply(lambda x: filter_packages_by_list(x[package_name],x[col],bb_order), axis=1)

In [35]:
for col in recommender_cols:
    scored_df[col] = scored_df.apply(lambda x: filter_packages_by_subscription(x["is_Subscription"],x[col]), axis=1)

In [36]:
for col in recommender_cols:
    scored_df[col] = scored_df.apply(lambda x: filter_packages_by_name(x["Product_Type"],x[col]), axis=1)

In [37]:
### Order packages
scored_df["Recommendations_Ordered"] = scored_df.apply(lambda x: order_packages_common([x[recommender_cols[1]], x[recommender_cols[2]], x[recommender_cols[3]], x[recommender_cols[4]]]), axis=1)

In [38]:
scored_df.dropna(subset=recommender_cols,inplace=True)

In [39]:
scored_df.drop([package_name,"is_Subscription","Product_Type"],axis=1, inplace=True)

In [40]:
# test
scored_df.shape

(3872, 7)

In [41]:
# test
scored_df.head()

Unnamed: 0,User,Recommended Item 1,Recommended Item 2,Recommended Item 3,Recommended Item 4,Recommended Item 5,Recommendations_Ordered
0,000f325a72b9d24742237070939b57d1,FTTH_ANY STORM,WEB PRO,LTE_WEB BOOSTER,FTTH_WEB FAMILY ACTIVE,ANY SPIKE,"[FTTH_WEB FAMILY ACTIVE, WEB PRO, ANY SPIKE, L..."
1,0011c1b02e2403c74c75ae8b5582e018,FTTH_WEB FAMILY PLUS,FTTH_WEB BOOSTER,FTTH_WEB CHAMP,FTTH_WEB FAMILY XTRA,FTTH_WEB MASTER,"[FTTH_WEB BOOSTER, FTTH_WEB FAMILY XTRA, FTTH_..."
3,001e2b18f91f221782a539608896254d,FTTH_WEB FAMILY ACTIVE,FTTH_WEB BOOSTER,WEB PRO,FTTH_ANY SPIKE,FTTH_ANY FLIX,"[FTTH_WEB BOOSTER, FTTH_ANY SPIKE, WEB PRO, FT..."
4,00368aec50ec17b5b9f20309145c56e7,FTTH_ANY STORM,WEB PRO,FTTH_ANY TIDE,FTTH_ANY FLIX,FTTH_WEB BOOSTER,"[FTTH_ANY TIDE, FTTH_WEB BOOSTER, WEB PRO, FTT..."
5,0041ac2bc47fc302ae3bc9c8c5db5618,FTTH_WEB BOOSTER,FTTH_WEB FAMILY ACTIVE,FTTH_WEB FAMILY XTRA,FTTH_ANY STORM,WEB PRO,"[FTTH_WEB FAMILY ACTIVE, FTTH_ANY STORM, FTTH_..."
