## CHECK_MB_INVOICE

### Data price

In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
import datetime as dt
from os.path import join, normpath
from collections import namedtuple

import roamability as rb

In [12]:
#######################################################################################
# Define variables here
#######################################################################################

folder_name = r'c:\Users\balob\Downloads\new1'
file_name_data = r'202011 data_usage.csv'
use_value = 'Cost' # 'Cost', 'Revenue', 'Profit'

hkd_usd_rate = 0.13

start_date = '20201101' # Including
end_date = '20201130' # Including

sponsor_object = namedtuple('Sponsor', ['name', 'mccmnc', 'sponsor_id'])

# s1_sponsor = sponsor_object('S1', 42501, 1)
# s2_sponsor = sponsor_object('S2', 26006, 2)
s4_sponsor = sponsor_object('S4', 45403, 4)
# s5_sponsor = sponsor_object('S5', 51503, 5)

sponsors = [s4_sponsor]

#######################################################################################

#######################################################################################

In [13]:
sql_srt="""SELECT t.network_id AS VisitedNetworkId, c.country_name AS Country, t.tadig_code AS Tadig
FROM RDB_TADIG_CODES t
LEFT JOIN RDB_NETWORKS n ON t.network_id = n.network_id
LEFT JOIN RDB_COUNTRIES c ON n.country_id = c.country_id;"""

with rb.MssqlConnect('172.18.11.82', '10028', 'BSS', 'iKQVm40AZAmyRaw72LeY') as cnxn:
    df_tadig = pd.read_sql_query(sql_srt, cnxn, coerce_float=False)
display(df_tadig.head(3))

Unnamed: 0,VisitedNetworkId,Country,Tadig
0,4,Afghanistan,AFGAW
1,5,Afghanistan,AFGAR
2,6,Afghanistan,AFG55


In [14]:
df_data = pd.read_csv(join(folder_name, file_name_data)
                    ,usecols = ['TAGID','TRAN_DATE','USAGE_BYTE_BILL','BILL_AMT']
                    ,parse_dates=['TRAN_DATE']
                     )
df_data.BILL_AMT = df_data.BILL_AMT * hkd_usd_rate
df_data.rename({'TAGID':'Tadig','TRAN_DATE':'Day','USAGE_BYTE_BILL':'BYTES','BILL_AMT':'DATA'}, axis=1, inplace=True)
df_data = pd.merge(df_data, df_tadig, how='left', on='Tadig')

df_data['Month'] = df_data.Day.dt.strftime('%Y-%m')

df_data.head()

Unnamed: 0,Tadig,Day,BYTES,DATA,VisitedNetworkId,Country,Month
0,EGYAR,2020-11-01,16588622,0.160416,261,Egypt,2020-11
1,EGYAR,2020-11-02,33330196,0.322312,261,Egypt,2020-11
2,EGYAR,2020-11-03,29653236,0.286754,261,Egypt,2020-11
3,EGYAR,2020-11-04,177282549,1.714368,261,Egypt,2020-11
4,EGYAR,2020-11-05,811919343,7.851469,261,Egypt,2020-11


In [15]:
df_data.loc[df_data.Tadig.str.startswith('SAUAJ',na=False), ['BYTES','DATA']].sum()

BYTES    5.363958e+09
DATA     2.272987e+01
dtype: float64

In [16]:
# Получение данных из BSS

sql_srt="""
SELECT * FROM
(
SELECT
Day,SponsorPrefix,VisitedCountryId,VisitedNetworkId,Customer,Cost,Revenue
,'DATA' AS UsageType
FROM aggreg.GPRS_AGR
WHERE `Day` BETWEEN {start_date} AND {end_date}
UNION ALL
SELECT
Day,SponsorPrefix,VisitedCountryId,VisitedNetworkId,Customer,Cost,Revenue
,CASE WHEN Sms = 0 THEN 'MOC' WHEN Sms = 1 THEN 'MOSMS' END AS UsageType
FROM aggreg.MOC_AGR
WHERE `Day` BETWEEN {start_date} AND {end_date}
UNION ALL
SELECT
Day,SponsorPrefix,VisitedCountryId,VisitedNetworkId,Customer,Cost,Revenue
,CASE WHEN Sms = 0 THEN 'MTC' WHEN Sms = 1 THEN 'MTSMS' END AS UsageType
FROM aggreg.MTC_AGR
WHERE `Day` BETWEEN {start_date} AND {end_date}
) t
""".format(start_date=start_date, end_date=end_date)

with rb.MySqlConnect('172.18.11.40', 'BSS', 'noc', 'WcQUzkXiXwoxnFfGnRxb') as cnxn:
    df_bss_source = pd.read_sql_query(sql_srt, cnxn)
# df_bss_source.to_csv(join(downloads, output_bss_usage_file), index=False)
df_bss_source.head(3)

Unnamed: 0,Day,SponsorPrefix,VisitedCountryId,VisitedNetworkId,Customer,Cost,Revenue,UsageType
0,20201101,23450,18,67,172868,0.206425,0.41284,DATA
1,20201101,23450,44,201,172901,0.012655,0.025313,DATA
2,20201101,23450,168,770,172901,2.8e-05,5.4e-05,DATA


In [17]:
# Подготовка данных и объединение со справочниками

df_bss_processed = df_bss_source.copy()

# Замена префикса Спонсора на символьное обозначение? S1, S2...
sponsor_dict = dict((sponsor.mccmnc, sponsor.name) for sponsor in sponsors)
df_bss_processed.SponsorPrefix.replace(sponsor_dict, inplace=True)

# Подготовка широкого DF с заданием используемого значения (Cost, Revenue, Profit)
df_bss_processed = df_bss_processed.pivot_table(values=use_value,
                                                index=['Day','SponsorPrefix','VisitedNetworkId'],
                                                columns='UsageType', aggfunc=np.sum).reset_index()

# Обработка значения даты с добавлением дополнительных полей даты
df_bss_processed.Day = pd.to_datetime(df_bss_processed.Day, format='%Y%m%d')
df_bss_processed['Month'] = df_bss_processed.Day.dt.strftime('%Y-%m')

# Добавление поля TOTAL и выборка используемых в дальнейшем полей
df_bss_processed['TOTAL'] = df_bss_processed[['MOC','MTC','MOSMS','MTSMS','DATA']].sum(axis=1)
df_bss_processed = df_bss_processed[
    ['Day', 'Month','SponsorPrefix','VisitedNetworkId','MOC','MTC','MOSMS','MTSMS','DATA', 'TOTAL']].fillna(0)

df_bss_processed = df_bss_processed.loc[df_bss_processed.SponsorPrefix == 'S4']
df_bss_processed.head(3)

UsageType,Day,Month,SponsorPrefix,VisitedNetworkId,MOC,MTC,MOSMS,MTSMS,DATA,TOTAL
276,2020-11-01,2020-11,S4,261,0.0,0.0,0.0,0.0,0.254905,0.254905
277,2020-11-01,2020-11,S4,499,0.0,0.0,0.0,0.0,0.008376,0.008376
278,2020-11-01,2020-11,S4,802,0.0,0.0,0.0,0.0,0.0,0.0


In [18]:
df_temp = pd.merge(
    df_data.groupby(['Month','Country','Tadig','VisitedNetworkId'], as_index=False)['DATA'].sum(),
    df_bss_processed.groupby(['Month','VisitedNetworkId'], as_index=False)['DATA'].sum(),
    how='outer', on=['Month','VisitedNetworkId'], suffixes=['_MB','_BSS']
)

# df_temp.drop('VisitedNetworkId', axis=1, inplace=True)
display(df_temp.sort_values(by=['Country','Tadig']).round(2))
df_temp.groupby(['Month'], as_index=False)['DATA_MB','DATA_BSS'].sum().round(2)

Unnamed: 0,Month,Country,Tadig,VisitedNetworkId,DATA_MB,DATA_BSS
0,2020-11,Egypt,EGYAR,261,137.79,171.27
1,2020-11,Japan,JPNDO,499,2.71,2.61
2,2020-11,Malaysia,MYSMI,588,6.66,2.68
3,2020-11,Qatar,QATB1,753,0.36,0.35
4,2020-11,Saudi Arabia,SAUAJ,802,22.73,17.35
5,2020-11,Thailand,THAWN,952,3.29,5.14


Unnamed: 0,Month,DATA_MB,DATA_BSS
0,2020-11,173.54,199.39


In [22]:
df_temp.loc[df_temp.Tadig.isin(['EGYAR'])].sort_values(by=['Country','Tadig']).round(2)

Unnamed: 0,Month,Country,Tadig,VisitedNetworkId,DATA_MB,DATA_BSS
0,2020-11,Egypt,EGYAR,261,137.79,171.27


In [21]:
df_bss_processed.groupby(['Month'], as_index=False)['MOC','MTC','MOSMS','MTSMS','DATA'].sum()#.unstack(1) # .pivot_table()

UsageType,Month,MOC,MTC,MOSMS,MTSMS,DATA
0,2020-11,0.0,0.0,20.040855,0.0,199.394304


In [24]:
df_bss_processed.head()

UsageType,Day,Month,SponsorPrefix,VisitedNetworkId,MOC,MTC,MOSMS,MTSMS,DATA,TOTAL
267,2020-10-01,2020-10,S4,261,0.0,0.0,2.43192,0.0,1.15619,3.58811
268,2020-10-01,2020-10,S4,499,0.0,0.0,0.0,0.0,0.334341,0.334341
269,2020-10-01,2020-10,S4,588,0.0,0.0,0.0,0.0,0.0,0.0
270,2020-10-01,2020-10,S4,802,0.0,0.0,0.0,0.0,0.129865,0.129865
271,2020-10-01,2020-10,S4,952,0.0,0.0,0.0,0.0,0.145308,0.145308


In [34]:
5699.12 * hkd_usd_rate

740.8856000000001