In [158]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import seaborn as sns

### Columns - common
- Product_ID - product code
- Base_Type (BB/VOICE/PEOTV)
- Pricing_Type (PAID/FREE)
- Package_Type (ADSL/Fibre/4G/Telephone/PeoTV)
- VAS (YES/NO) - value added service
- Title
- Description
- Included_Packages - packages shipped with a product
- Price (Rs.) - totl cost, downpayment or first installment
- Monthly_Rental (Rs.)
- Subscription_Type (SINGLE_PLAY/DOUBLE_PLAY/TRIPLE_PLAY)
- Minimum_Subscription_Period (years)
- Recidence_Type (Home/Office)
- Tax_Status (INCLUDED/EXCLUDED)
- Conditions - list of conditions for package
- **Available_Regions** - list of available regions: MSAN or related level
- **Dependent_Packages** (other products it depends on)

### Columns- BB
- BB_Data_standard (GB) - Standard data for a Time-based package
- BB_Data_Free (GB) - Free data for a Time-based package
- BB_Data_Anytime (GB) - data for anytime package
- BB_Data_Unlimited (GB) - data for unlimited package
- BB_Connection_Type (Time-based/Anytime/Unlimited)
- BB_Connection_Speed (Download Speed/ Upload Speed) 

### Columns- VOICE
- VOICE_Home_SLT_Instrument_Rental (Rs.) - Home Telephone rental (with SLT provided telephone)
- VOICE_Home_Customer_Instrument_Rental (Rs.) - Home Telephone rental (with Customer provided telephone)
- VOICE_Charge_Active_Hours (SLT-STL, SLT-Other) (Rs.) - Voice calls charges for Active hours
- VOICE_Charge_Leisure_Hours (SLT-STL, SLT-Other) (Rs.) - Voice calls charges for Leisure hours
- VOICE_Free_Minutes - Free voice call minutes given per package
- VOICE_Telehelth_Insurance_Benefits (Rs.) - Awarded Benefit at fullfilment for SLT Telehealth Insurance packages
- VOICE_Tele_Life_Insurance_Benefits (Rs.) - Awarded Benefit at fullfilment for SLT Tele Life Insurance packages

### Columns- PEOTV
- PEOTV_No_of_Channels - No of channels in a PEO TV package

In [159]:
# product catalog
product_catalog = pd.read_csv("data/product_catalog/Product_Profile_CSv.csv")

## Merging Actual Product Information Datasets

### VAS Info dataset for merging

In [160]:
vas_all_df = pd.read_csv("data/product_catalog/VAS_Products_Info.csv")

In [161]:
vas_all_df.drop(["Unnamed: 0"],axis=1,inplace=True)

In [162]:
vas_all_df["VAS_Type"].unique()

array(['Voice', 'Other', 'BB', 'PeoTV'], dtype=object)

In [163]:
def update_vas_ype(vas):
    if not isinstance(vas, float):
        if "Voice" in vas:
            return "VOICE"
        if "Other" in vas:
            return "OTHER"
        if "PeoTV" in vas:
            return "PEOTV"
        else:
            return vas

In [164]:
vas_all_df["VAS_Type"] = vas_all_df["VAS_Type"].apply(lambda x: update_vas_ype(x))

In [165]:
vas_all_df["VAS_Type"].unique()

array(['VOICE', 'OTHER', 'BB', 'PEOTV'], dtype=object)

In [166]:
vas_all_df.rename(columns={"VAS":"Product_ID","VAS_Type":"Base_Type","Rental":"Monthly_Rental","One_Time_Charge":"Price"},inplace=True)

In [167]:
vas_all_df["Product_ID"].unique()

array(['V-Basic4_TeleLife', 'V-Basic2_TeleLife', 'C_Sisu Connect',
       'P-Basic4_Tele Life', 'C-Basic1_Tele Life', 'P-Plus4_Tele Life',
       'V-Plus4_TeleLife', 'P-Basic2_Tele Life', 'P_Sisu Connect',
       'V_E-channeling Subscription', 'V_Detailed Bill',
       'P_Call Transfer Three way', 'P_Call holding',
       'P_Outgoing Call Memory', 'P_Call back on busy',
       'P_Call Forwarding Offline', 'P_Incoming Call Transfer',
       'P_Megaline New Connection',
       'P_Additional features -Double VAS Bundle',
       'P_Call Forwarding by time', 'P_Incoming Call Memory',
       'P_Anonymous call barring', 'P_Call park',
       'P_CLI presentation in call waiting', 'V_One-Time Detailed Bill',
       'P-Plus1_Tele Life', 'BB_SLT Film Hall Service',
       'V_Cordeless Phone - Installment', 'V_Basic Phone',
       'V_Referral offer Discount', 'V_Short Message Service',
       'V_Call Forwarding - No Answer', 'V_Call Forwarding - On Busy',
       'AB_Additional Distance', 'V_Sisu C

In [168]:
vas_all_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 128 entries, 0 to 127
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Product_ID      128 non-null    object 
 1   Monthly_Rental  26 non-null     float64
 2   Price           3 non-null      float64
 3   Base_Type       128 non-null    object 
dtypes: float64(2), object(2)
memory usage: 4.1+ KB


### Products from BSS merging

In [169]:
products_bss_df = pd.read_csv("data/product_catalog/Product_info_from_BSS.csv")

In [170]:
products_bss_df.drop(["Unnamed: 0"],axis=1,inplace=True)

In [171]:
def update_cat(cat):
    if not isinstance(cat, float):
        if "Single Play-Voice" in cat:
            return "SINGLE_PLAY"
        if "Triple Play" in cat:
            return "TRIPLE_PLAY"
        if "Double Play" in cat:
            return "DOUBLE_PLAY"

In [172]:
products_bss_df["CATEGORY"] = products_bss_df["CATEGORY"].apply(lambda x: update_cat(x))

In [173]:
products_bss_df.drop(["PRODUCT_ID.hash","TARIFF_ID.hash","CATEGORY_DETAILS"],axis=1,inplace=True)

In [174]:
products_bss_df.rename(columns={"PRODUCT_NAME":"Product_ID","CATEGORY":"Subscription_Type","MEDIUM":"Transfer_Medium"}, inplace=True)

In [175]:
products_bss_df["Product_ID"].unique()

array([nan, 'V-Plus4_TeleLife', 'V_E-channeling Subscription',
       'V-Basic2_TeleLife', 'V-Basic4_TeleLife', 'V-Basic1_TeleLife',
       'V_Sisu Connect', 'V_Telehealth', 'V-Plus1_TeleLife',
       'V_E-channeling Registration', 'BB_ Entertainment Unlimited',
       'OTT_SLT PeoTV Go', 'AB_WireLess Access',
       'BB_SLT Film Hall Service', 'BB_Detailed Reports ',
       'AB_Fiber Access Bearer', 'BB_PeoTVGO', 'BB_Extra GB',
       'BB_SLT BroadBand Service', 'BB_Personal Storage',
       'E_SLT PeoTV Service', 'V_SLT Voice Service',
       'AB_Copper Access Bearer'], dtype=object)

In [176]:
products_bss_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23 entries, 0 to 22
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Product_ID         22 non-null     object 
 1   Transfer_Medium    19 non-null     object 
 2   Subscription_Type  17 non-null     object 
 3   TARIFF_NAME        22 non-null     object 
 4   TARIFF_DESC        22 non-null     object 
 5   Monthly_Rental     10 non-null     float64
dtypes: float64(1), object(5)
memory usage: 1.2+ KB


### Meging DF to product catalog

In [177]:
final_product_catalog = pd.concat([product_catalog,vas_all_df,products_bss_df], axis=0, ignore_index=True)

In [178]:
final_product_catalog.drop_duplicates(["Product_ID"],inplace=True,keep="last")

In [179]:
final_product_catalog.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 264 entries, 0 to 392
Data columns (total 45 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   Product_ID                             263 non-null    object 
 1   Base_Type                              241 non-null    object 
 2   Pricing_Type                           134 non-null    object 
 3   Package_Type                           134 non-null    object 
 4   VAS                                    134 non-null    object 
 5   Title                                  134 non-null    object 
 6   Description                            47 non-null     object 
 7   BB_Data_Standard                       43 non-null     float64
 8   BB_Data_Free                           43 non-null     object 
 9   BB_Data_Anytime                        32 non-null     float64
 10  BB_Data_Unlimited                      11 non-null     object 
 11  BB_Con

In [155]:
final_product_catalog

Unnamed: 0,Product_ID,Base_Type,Pricing_Type,Package_Type,VAS,Title,Description,BB_Data_Standard,BB_Data_Free,BB_Data_Anytime,...,Subscription_Type,Minimum_Subscription_Period,Recidence_Type,Tax_Status,Conditions,Available_Regions,Dependent_Packages,Transfer_Medium,TARIFF_NAME,TARIFF_DESC
0,BB_Higher_Education,BB,PAID,ADSL,,HIGHER EDUCATION,,4.0,6,,...,"DOUBLE_PLAY, TRIPLE_PLAY",,Home,EXCLUDED,Speeds may vary depending on the line distance...,,,,,
1,BB_Web_Lite,BB,PAID,ADSL,,WEB LITE,,6.0,9,,...,"DOUBLE_PLAY, TRIPLE_PLAY",,Home,EXCLUDED,Speeds may vary depending on the line distance...,,,,,
2,BB_Entree,BB,PAID,ADSL,,ENTREE,,,,7.0,...,"DOUBLE_PLAY, TRIPLE_PLAY",,Home,EXCLUDED,Speeds may vary depending on the line distance...,,,,,
3,BB_Web_Starter_4G,BB,PAID,4G,,WEB STARTER 4G,,11.0,17,,...,"DOUBLE_PLAY, TRIPLE_PLAY",,Home,EXCLUDED,Download and upload speed will be reduced to 6...,,,,,
4,BB_Web_Starter_ADSL,BB,PAID,ADSL,,WEB STARTER ADSL,,11.0,17,,...,"DOUBLE_PLAY, TRIPLE_PLAY",,Home,EXCLUDED,Speeds may vary depending on the line distance...,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
388,BB_SLT BroadBand Service,,,,,,,,,,...,DOUBLE_PLAY,,,,,,,MC,Entree,Entrée
389,BB_Personal Storage,,,,,,,,,,...,DOUBLE_PLAY,,,,,,,MC,BB Storage_Charge,BB Storage_Charge
390,E_SLT PeoTV Service,,,,,,,,,,...,,,,,,,,,Cu_Peo Silver,Cu_Peo Silver
391,V_SLT Voice Service,,,,,,,,,,...,,,,,,,,,z Sales End_Add. Line with SLT Phone,Additional Line with SLT Phone


### Mark VAS Services

In [180]:
original_list = final_product_catalog["Product_ID"].unique()

In [181]:
# other products list
vas_list_df = pd.read_csv("data/product_catalog/VAS_Names_Only_List.csv")
vas_list = vas_list_df["VAS"].unique()

In [182]:
matches = list(set(original_list).intersection(set(vas_list)))
matches

['AB_Copper Access Bearer',
 'BB_YouTube Bundle',
 'E_PeoTV Initiation',
 'P_Sisu Connect',
 'P_Anonymous call barring',
 'BB_ Entertainment Unlimited',
 'AB_Fiber Access Bearer',
 'OTT_SLT PeoTV Go',
 'V_Caller Line Identification',
 'P_IDD',
 'P_Call Transfer Three way',
 'V_Call Forwarding-On Busy',
 'BB_PeoTVGO Revenue code',
 'P-Basic4_Tele Life',
 'Meet Max',
 'V-Basic1_TeleLife',
 'V_Anonymous call barring',
 'V_Installment',
 'PeoTVGo_Channel Package',
 'Meet Lite',
 'M_Activation Charge',
 'BB_Report Subscription Charge',
 'BB_Personal Storage',
 'V-Basic4_TeleLife',
 'V-Basic2_TeleLife',
 'AB_Citylink Loyalty Rewards',
 'V_Call holding',
 'V_Single VAS Bundle',
 'V_Absentee service',
 'V_Call Forwarding - No Answer',
 'PEO TV GO_OTT_A-la carte channels',
 'V_Incoming Call Memory',
 'V_Do not disturb service',
 'V_One-Time Detailed Bill',
 'P-Plus4_Tele Life',
 'V_Password call barring (secret code)',
 'BB Personal Storage',
 'BB_Detailed Reports ',
 'P_Outgoing Call Memory',


In [184]:
def mark_vas(vas):
    if not isinstance(vas, float):
        for v in matches:
            if v in vas:
                return "YES"

In [185]:
final_product_catalog["VAS"] = final_product_catalog["Product_ID"].apply(lambda x: mark_vas(x))

In [186]:
final_product_catalog["VAS"].value_counts()

YES    128
Name: VAS, dtype: int64

In [187]:
final_product_catalog.to_csv("data/product_catalog/Product_Profile_Finalised_CSV.csv")