In [7]:
import pandas as pd
from math import nan

In [8]:
user_content = pd.read_csv("data/azure/VAS_user_content.csv")

In [9]:
scored_df = pd.read_parquet("data/azure/scored/vas_scored.parquet")

### reules Definition

In [10]:
package_name = "VAS_Package"

In [11]:
"""
    Package Filtration by type
"""
def filter_packages_by_name(pack_name,col):
    if not isinstance(col, float) and col is not None and not isinstance (pack_name, float) and pack_name is not None:
        if "LTE" in pack_name:
            if "LTE" in col or "FTTH" not in col:
                return col
        if "Megaline" in pack_name:
            if "LTE" not in col:
                return col
        if "FTTH" in pack_name:
            if "LTE" not in col and "FTTH" in col:
                return col
        if "NO_INFO" in pack_name or "NO INFO" in pack_name:
            return col

"""
    Check subscription 
"""
def filter_packages_by_subscription(is_sub,col):
    if not isinstance(is_sub, float):
        if "YES" not in is_sub:
            return col
        if "NO_INFO" in is_sub or "NO INFO" in is_sub:
            return col

    
"""
    Filter by most subscription type
"""
def filter_packages_by_sub_type(ori_pack, col, sub_type):
    if not isinstance(col, float) and not isinstance(ori_pack, float):
        fin = nan
        if "SINGLE_PLAY" in sub_type:
            if "BB" not in col and ("Peo" not in col and "PEO" not in col and "peo" not in col):
                fin = col
        if "DOUBLE_PLAY_PV" in sub_type:
            if "BB" not in col:
                fin = col
        if "DOUBLE_PLAY_BV" in sub_type:
            if "Peo" not in col and "PEO" not in col and "peo" not in col:
                fin = col
        return fin 

In [12]:
scored_df = scored_df.merge(user_content[["ACCOUNT_NUM.hash",package_name,"is_Subscription","Product_Type","Sub_Type"]].drop_duplicates(["ACCOUNT_NUM.hash",package_name]),left_on="User", right_on="ACCOUNT_NUM.hash", how="left").drop("ACCOUNT_NUM.hash",axis=1)

In [13]:
recommender_cols = ["Recommended Item 1","Recommended Item 2","Recommended Item 3","Recommended Item 4","Recommended Item 5"]

### Applying Rules

In [14]:
for col in recommender_cols:
    scored_df[col] = scored_df.apply(lambda x: filter_packages_by_sub_type(x[package_name],x[col],x["Sub_Type"]), axis=1)

In [15]:
for col in recommender_cols:
    scored_df[col] = scored_df.apply(lambda x: filter_packages_by_name(x["Product_Type"],x[col]), axis=1)

In [16]:
scored_df.dropna(subset=recommender_cols,inplace=True)

In [17]:
scored_df.drop([package_name,"is_Subscription","Product_Type","Sub_Type"],axis=1, inplace=True)

In [18]:
# test
scored_df.shape

(2045, 11)

## Filter by Confidence of Recommendations

In [19]:
rated_df = scored_df.describe()

In [20]:
def filter_by_confidence(data_df, col_list):
    i = 0
    for col in col_list:
        if col > data_df.iloc[1,i]:
            return "YES"
        i = i +1
    return "NO"

In [21]:
scored_df["Best_Recommendations"] = scored_df.apply(lambda x: filter_by_confidence(rated_df,[x["Predicted Rating 1"],x["Predicted Rating 2"],x["Predicted Rating 3"],x["Predicted Rating 4"],x["Predicted Rating 5"]]), axis=1)

In [22]:
scored_df["Best_Recommendations"].value_counts()

YES    1685
NO      360
Name: Best_Recommendations, dtype: int64

In [23]:
scored_df.drop(["Predicted Rating 1","Predicted Rating 2","Predicted Rating 3","Predicted Rating 4","Predicted Rating 5"],axis=1, inplace=True)

In [24]:
scored_df.head()

Unnamed: 0,User,Recommended Item 1,Recommended Item 2,Recommended Item 3,Recommended Item 4,Recommended Item 5,Best_Recommendations
0,000eee57a6c7a02c8aca8b410ea2e287,eChanneling Subcription,BB_ Entertainment Unlimited,Package Rental Through ADSL,Meet Max,Meet Lite,YES
3,00151cfd733cd1a97ae7fa72dc49907a,eChanneling Subcription,BB_ Entertainment Unlimited,Package Rental Through ADSL,Meet Max,Meet Lite,YES
9,004084d588fbe768fe77660e2cad665d,eChanneling Subcription,BB_ Entertainment Unlimited,Package Rental Through ADSL,Meet Max,Meet Lite,YES
27,00a76224057f47154cf94a2119c7464a,eChanneling Subcription,BB_ Entertainment Unlimited,Package Rental Through ADSL,Meet Max,Meet Lite,YES
30,00b28798952fc029099208632c4b2afe,eChanneling Subcription,BB_ Entertainment Unlimited,Package Rental Through ADSL,Meet Max,Meet Lite,YES


In [25]:
scored_df.to_csv("data/azure/scored/vas_scored_filterd.csv")