## CHECK_MB_INVOICE

### Data price

In [3]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
import datetime as dt
from os.path import join, normpath
from collections import namedtuple

import roamability as rb

In [6]:
#######################################################################################
# Define variables here
#######################################################################################

folder_name = r'c:\Users\balob\Downloads\new1'
file_name_data = r'202006 data_usage.csv'
use_value = 'Cost' # 'Cost', 'Revenue', 'Profit'

hkd_usd_rate = 0.13

start_date = '20200601' # Including
end_date = '20200730' # Including

sponsor_object = namedtuple('Sponsor', ['name', 'mccmnc', 'sponsor_id'])

s1_sponsor = sponsor_object('S1', 42501, 1)
s2_sponsor = sponsor_object('S2', 26006, 2)
s4_sponsor = sponsor_object('S4', 45403, 4)
s5_sponsor = sponsor_object('S5', 51503, 5)

sponsors = [s1_sponsor, s2_sponsor, s4_sponsor, s5_sponsor]

#######################################################################################

#######################################################################################

In [4]:
sql_srt="""SELECT t.network_id AS VisitedNetworkId, c.country_name AS Country, t.tadig_code AS Tadig
FROM RDB_TADIG_CODES t
LEFT JOIN RDB_NETWORKS n ON t.network_id = n.network_id
LEFT JOIN RDB_COUNTRIES c ON n.country_id = c.country_id;"""

with rb.MssqlConnect('172.18.11.82', '10028', 'BSS', 'iKQVm40AZAmyRaw72LeY') as cnxn:
    df_tadig = pd.read_sql_query(sql_srt, cnxn, coerce_float=False)
display(df_tadig.head(3))

Unnamed: 0,VisitedNetworkId,Country,Tadig
0,4,Afghanistan,AFGAW
1,5,Afghanistan,AFGAR
2,6,Afghanistan,AFG55


In [8]:
df_data = pd.read_csv(join(folder_name, file_name_data)
                    ,usecols = ['TAGID','TRAN_DATE','USAGE_BYTE_BILL','BILL_AMT']
                    ,parse_dates=['TRAN_DATE']
                     )
df_data.BILL_AMT = df_data.BILL_AMT * hkd_usd_rate
df_data.rename({'TAGID':'Tadig','TRAN_DATE':'Day','USAGE_BYTE_BILL':'BYTES','BILL_AMT':'DATA'}, axis=1, inplace=True)
df_data = pd.merge(df_data, df_tadig, how='left', on='Tadig')

df_data['Month'] = df_data.Day.dt.strftime('%Y-%m')

df_data.head()

Unnamed: 0,Tadig,Day,BYTES,DATA,VisitedNetworkId,Country,Month
0,MYSMI,2020-06-22,4699167.0,0.0455,588,Malaysia,2020-06
1,THAWN,2020-06-01,17389143.0,0.0325,952,Thailand,2020-06
2,THAWN,2020-06-02,50223049.0,0.0949,952,Thailand,2020-06
3,THAWN,2020-06-03,61868981.0,0.117,952,Thailand,2020-06
4,THAWN,2020-06-04,13785614.0,0.026,952,Thailand,2020-06


In [9]:
df_data.loc[df_data.Tadig.str.startswith('SAUAJ',na=False), ['BYTES','DATA']].sum()

BYTES    1.119656e+10
DATA     4.744480e+01
dtype: float64

In [10]:
# Получение данных из BSS

sql_srt="""
SELECT * FROM
(
SELECT
Day,SponsorPrefix,VisitedCountryId,VisitedNetworkId,Customer,Cost,Revenue
,'DATA' AS UsageType
FROM aggreg.GPRS_AGR
WHERE `Day` BETWEEN {start_date} AND {end_date}
UNION ALL
SELECT
Day,SponsorPrefix,VisitedCountryId,VisitedNetworkId,Customer,Cost,Revenue
,CASE WHEN Sms = 0 THEN 'MOC' WHEN Sms = 1 THEN 'MOSMS' END AS UsageType
FROM aggreg.MOC_AGR
WHERE `Day` BETWEEN {start_date} AND {end_date}
UNION ALL
SELECT
Day,SponsorPrefix,VisitedCountryId,VisitedNetworkId,Customer,Cost,Revenue
,CASE WHEN Sms = 0 THEN 'MTC' WHEN Sms = 1 THEN 'MTSMS' END AS UsageType
FROM aggreg.MTC_AGR
WHERE `Day` BETWEEN {start_date} AND {end_date}
) t
""".format(start_date=start_date, end_date=end_date)

with rb.MySqlConnect('172.18.11.40', 'BSS', 'noc', 'WcQUzkXiXwoxnFfGnRxb') as cnxn:
    df_bss_source = pd.read_sql_query(sql_srt, cnxn)
# df_bss_source.to_csv(join(downloads, output_bss_usage_file), index=False)
df_bss_source.head(3)

Unnamed: 0,Day,SponsorPrefix,VisitedCountryId,VisitedNetworkId,Customer,Cost,Revenue,UsageType
0,20200601,23450,37,150,172844,0.035395,0.070769,DATA
1,20200601,23450,37,167,172844,2.2297,4.45932,DATA
2,20200601,26006,14,50,172720,0.351444,0.741292,DATA


In [11]:
# Подготовка данных и объединение со справочниками

df_bss_processed = df_bss_source.copy()

# Замена префикса Спонсора на символьное обозначение? S1, S2...
sponsor_dict = dict((sponsor.mccmnc, sponsor.name) for sponsor in sponsors)
df_bss_processed.SponsorPrefix.replace(sponsor_dict, inplace=True)

# Подготовка широкого DF с заданием используемого значения (Cost, Revenue, Profit)
df_bss_processed = df_bss_processed.pivot_table(values=use_value,
                                                index=['Day','SponsorPrefix','VisitedNetworkId'],
                                                columns='UsageType', aggfunc=np.sum).reset_index()

# Обработка значения даты с добавлением дополнительных полей даты
df_bss_processed.Day = pd.to_datetime(df_bss_processed.Day, format='%Y%m%d')
df_bss_processed['Month'] = df_bss_processed.Day.dt.strftime('%Y-%m')

# Добавление поля TOTAL и выборка используемых в дальнейшем полей
df_bss_processed['TOTAL'] = df_bss_processed[['MOC','MTC','MOSMS','MTSMS','DATA']].sum(axis=1)
df_bss_processed = df_bss_processed[
    ['Day', 'Month','SponsorPrefix','VisitedNetworkId','MOC','MTC','MOSMS','MTSMS','DATA', 'TOTAL']].fillna(0)

df_bss_processed = df_bss_processed.loc[df_bss_processed.SponsorPrefix == 'S4']
df_bss_processed.head(3)

UsageType,Day,Month,SponsorPrefix,VisitedNetworkId,MOC,MTC,MOSMS,MTSMS,DATA,TOTAL
207,2020-06-01,2020-06,S4,802,0.0,0.0,0.0,0.0,0.87842,0.87842
208,2020-06-01,2020-06,S4,996,0.516149,0.257393,0.0,0.0,0.3786,1.152142
421,2020-06-02,2020-06,S4,802,0.0,0.0,0.0,0.0,0.281063,0.281063


In [12]:
df_temp = pd.merge(
    df_data.groupby(['Month','Country','Tadig','VisitedNetworkId'], as_index=False)['DATA'].sum(),
    df_bss_processed.groupby(['Month','VisitedNetworkId'], as_index=False)['DATA'].sum(),
    how='outer', on=['Month','VisitedNetworkId'], suffixes=['_MB','_BSS']
)

# df_temp.drop('VisitedNetworkId', axis=1, inplace=True)
display(df_temp.sort_values(by=['Country','Tadig']).round(2))
df_temp.groupby(['Month'], as_index=False)['DATA_MB','DATA_BSS'].sum().round(2)

Unnamed: 0,Month,Country,Tadig,VisitedNetworkId,DATA_MB,DATA_BSS
0,2020-06,Israel,ISRPL,470,0.0,
1,2020-06,Japan,JPNDO,499,1.58,0.01
2,2020-06,Malaysia,MYSMI,588,0.05,0.02
3,2020-06,Saudi Arabia,SAUAJ,802,47.44,24.6
4,2020-06,Thailand,THAWN,952,7.96,0.28
5,2020-06,United Arab Emirates,AREDU,996,48.84,0.7
6,2020-07,,,261,,0.37
7,2020-07,,,499,,0.39
8,2020-07,,,588,,0.0
9,2020-07,,,802,,14.67


Unnamed: 0,Month,DATA_MB,DATA_BSS
0,2020-06,105.86,25.59
1,2020-07,0.0,18.51


In [20]:
df_bss_processed.groupby(['Month'], as_index=False)['MOC','MTC','MOSMS','MTSMS'].sum()#.unstack(1) # .pivot_table()

UsageType,Month,MOC,MTC,MOSMS,MTSMS
0,2020-06,12.922769,1.54436,1.716965,0.0


In [21]:
df_bss_processed.groupby(['Month'])['MOC','MTC','MOSMS','MTSMS'].sum().unstack(1).sum()

16.184093999999998

In [34]:
5699.12 * hkd_usd_rate

740.8856000000001