In [1]:
import pandas as pd
from math import nan

In [2]:
user_content = pd.read_csv("data/azure/peoTV_user_content.csv")

In [3]:
scored_df = pd.read_parquet("data/azure/scored/peotv_scored.parquet")

### reules Definition

In [4]:
package_name = "Peo_TV_Package"
peo_order = ["GOLD", "PLATINUM", "SILVER", "UNNATHAM", "FAMILY", "SILVER PLUS", "TITANIUM", "ENTERTAINMENT"]

In [7]:
"""
    Package Filtration by type
"""
def filter_packages_by_name(pack_name,col):
    if not isinstance(col, float) and not isinstance (pack_name, float):
        if "LTE" in pack_name:
            if "LTE" in col or "FTTH" not in col:
                return col
        if "Megaline" in pack_name:
            if "LTE" not in col:
                return col
        if "FTTH" in pack_name:
            if "LTE" not in col and "FTTH" in col:
                return col
        if "NO_INFO" in pack_name or "NO INFO" in pack_name:
            return col

"""
    Check subscription 
"""
def filter_packages_by_subscription(is_sub,col):
    if not isinstance(is_sub, float):
        if "YES" in is_sub:
            return col
        if "NO_INFO" in is_sub or "NO INFO" in is_sub:
            return col

"""
    Check if suspended
"""
def filter_packages_if_suspended(is_suspended,col):
    if not isinstance(is_suspended, float):
        if "INSERVICE" in is_suspended:
            return col
        if "NO_INFO" in is_suspended or "NO INFO" in is_suspended:
            return col

"""
    Filter SLT STAFF packages
    PeoTV
"""
def filter_slt_staff_packages(ori_pack,col):
    if not isinstance(ori_pack, float) and not isinstance (col, float):
        if "STAFF" not in ori_pack or "Staff" not in ori_pack or "staff" not in ori_pack:
            if "STAFF" not in col or "Staff" not in col or "staff" not in col:
                return col
        else:
            return col


"""
    Filter by most prefrable PeoTV package
"""
def filter_packages_by_list(ori_pack, col, ord_list):
    if not isinstance(col, float) and not isinstance(ori_pack, float):
        pack_pos = 0
        for item in ord_list:
            if item in ori_pack:
                pack_pos = ord_list.index(item)
        comp_list = ord_list[:pack_pos]
        for item in comp_list:
            if item in col:
                return nan
        return col 

In [8]:
scored_df = scored_df.merge(user_content[["ACCOUNT_NUM.hash",package_name,"is_Subscription","Product_Type"]].drop_duplicates(["ACCOUNT_NUM.hash",package_name]),left_on="User", right_on="ACCOUNT_NUM.hash", how="left").drop("ACCOUNT_NUM.hash",axis=1)

In [9]:
recommender_cols = ["Recommended Item 1","Recommended Item 2","Recommended Item 3","Recommended Item 4","Recommended Item 5"]

### Applying Rules

In [10]:
for col in recommender_cols:
    scored_df[col] = scored_df.apply(lambda x: filter_slt_staff_packages(x[package_name],x[col]), axis=1)

In [11]:
for col in recommender_cols:
    scored_df[col] = scored_df.apply(lambda x: filter_packages_by_list(x[package_name],x[col],peo_order), axis=1)

In [191]:
# for col in recommender_cols:
#     scored_df[col] = scored_df.apply(lambda x: filter_packages_by_subscription(x["is_Subscription"],x[col]), axis=1)

In [None]:
# for col in recommender_cols:
#     scored_df[col] = scored_df.apply(lambda x: filter_packages_if_suspended(x["Service_Status"],x[col]), axis=1)

In [12]:
for col in recommender_cols:
    scored_df[col] = scored_df.apply(lambda x: filter_packages_by_name(x["Product_Type"],x[col]), axis=1)

In [13]:
scored_df.dropna(subset=recommender_cols,inplace=True)

In [14]:
scored_df.drop([package_name,"is_Subscription","Product_Type"],axis=1, inplace=True)

In [15]:
# test
scored_df.shape

(4849, 11)

## Filter by Confidence of Recommendations

In [18]:
rated_df = scored_df.describe()

In [24]:
def filter_by_confidence(data_df, col_list):
    i = 0
    for col in col_list:
        if col > data_df.iloc[1,i]:
            return "YES"
        i = i +1
    return "NO"

In [25]:
scored_df["Best_Recommendations"] = scored_df.apply(lambda x: filter_by_confidence(rated_df,[x["Predicted Rating 1"],x["Predicted Rating 2"],x["Predicted Rating 3"],x["Predicted Rating 4"],x["Predicted Rating 5"]]), axis=1)

In [27]:
scored_df["Best_Recommendations"].value_counts()

YES    2847
NO     2002
Name: Best_Recommendations, dtype: int64

In [28]:
scored_df.drop(["Predicted Rating 1","Predicted Rating 2","Predicted Rating 3","Predicted Rating 4","Predicted Rating 5"],axis=1, inplace=True)

In [33]:
scored_df.head()

Unnamed: 0,User,Recommended Item 1,Recommended Item 2,Recommended Item 3,Recommended Item 4,Recommended Item 5,Best_Recommendations
0,0013519e648e9718c56e1e6ccfb3b17d,PEO_SILVER,PEO_VIRUSARA_VARAPPRASADA,PEO_UNNATHAM,SLT_SHARE_HOLDER_PACKAGE,PEO_UTHAYAM,YES
3,002b0ab25152c9566b7441848f117593,PEO_SILVER,PEO_VIRUSARA_VARAPPRASADA,PEO_UNNATHAM,PEO_UTHAYAM,SLT_SHARE_HOLDER_PACKAGE,NO
4,003b9fb01f296b23d6727c322011db66,PEO_SILVER,PEO_UNNATHAM,PEO_SILVER_PLUS,SLTStaff_PEO_SILVER,PEO_UTHAYAM,YES
5,003ffd7c83136dcd2ffe06d88170defe,PEO_SILVER,PEO_UNNATHAM,PEO_VIRUSARA_VARAPPRASADA,SLT_STAFF_TRIPLE_PLAY,PRANAMA,YES
7,004c94de02a14064088638b77c84a739,PEO_SILVER,PEO_UNNATHAM,PEO_VIRUSARA_VARAPPRASADA,PEO_SILVER_PLUS,PEO_UTHAYAM,YES


In [32]:
scored_df.to_csv("data/azure/scored/peotv_scored_filterd.csv")