## CHECK_MB_INVOICE

### Data price

In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
import datetime as dt
from os.path import join, normpath
from collections import namedtuple

import roamability as rb

In [2]:
#######################################################################################
# Define variables here
#######################################################################################

folder_name = r'c:\Users\balob\Downloads\new1'
file_name_data = r'202003 data_usage(RB 202003).csv'
use_value = 'Cost' # 'Cost', 'Revenue', 'Profit'

hkd_usd_rate = 0.13

start_date = '20200301' # Including
end_date = '20200331' # Including

sponsor_object = namedtuple('Sponsor', ['name', 'mccmnc', 'sponsor_id'])

s1_sponsor = sponsor_object('S1', 42501, 1)
s2_sponsor = sponsor_object('S2', 26006, 2)
s4_sponsor = sponsor_object('S4', 45403, 4)
s5_sponsor = sponsor_object('S5', 51503, 5)

sponsors = [s1_sponsor, s2_sponsor, s4_sponsor, s5_sponsor]

#######################################################################################

#######################################################################################

In [3]:
sql_srt="""SELECT t.network_id AS VisitedNetworkId, c.country_name AS Country, t.tadig_code AS Tadig
FROM RDB_TADIG_CODES t
LEFT JOIN RDB_NETWORKS n ON t.network_id = n.network_id
LEFT JOIN RDB_COUNTRIES c ON n.country_id = c.country_id;"""

with rb.MssqlConnect('172.18.11.82', '10028', 'BSS', 'iKQVm40AZAmyRaw72LeY') as cnxn:
    df_tadig = pd.read_sql_query(sql_srt, cnxn, coerce_float=False)
display(df_tadig.head(3))

Unnamed: 0,VisitedNetworkId,Country,Tadig
0,4,Afghanistan,AFGAW
1,5,Afghanistan,AFGAR
2,6,Afghanistan,AFG55


In [4]:
df_data = pd.read_csv(join(folder_name, file_name_data)
                    ,usecols = ['TAGID','TRAN_DATE','USAGE_BYTE_BILL','BILL_AMT']
                    ,parse_dates=['TRAN_DATE']
                     )
df_data.BILL_AMT = df_data.BILL_AMT * hkd_usd_rate
df_data.rename({'TAGID':'Tadig','TRAN_DATE':'Day','USAGE_BYTE_BILL':'BYTES','BILL_AMT':'DATA'}, axis=1, inplace=True)
df_data = pd.merge(df_data, df_tadig, how='left', on='Tadig')

df_data['Month'] = df_data.Day.dt.strftime('%Y-%m')

df_data.head()

Unnamed: 0,Tadig,Day,BYTES,DATA,VisitedNetworkId,Country,Month
0,EGYAR,2020-03-01,889844418,8.605025,261,Egypt,2020-03
1,EGYAR,2020-03-02,557670275,5.392815,261,Egypt,2020-03
2,EGYAR,2020-03-03,539102507,5.21326,261,Egypt,2020-03
3,EGYAR,2020-03-04,285573390,2.761568,261,Egypt,2020-03
4,EGYAR,2020-03-05,483371115,4.674323,261,Egypt,2020-03


In [5]:
df_data.loc[df_data.Tadig.str.startswith('TUR',na=False)]

Unnamed: 0,Tadig,Day,BYTES,DATA,VisitedNetworkId,Country,Month
91,TURTS,2020-03-01,11110,0.0,972,Turkey,2020-03


In [6]:
# Получение данных из BSS

sql_srt="""
SELECT * FROM
(
SELECT
Day,SponsorPrefix,VisitedCountryId,VisitedNetworkId,Customer,Cost,Revenue
,'DATA' AS UsageType
FROM aggreg.GPRS_AGR
WHERE `Day` BETWEEN {start_date} AND {end_date}
UNION ALL
SELECT
Day,SponsorPrefix,VisitedCountryId,VisitedNetworkId,Customer,Cost,Revenue
,CASE WHEN Sms = 0 THEN 'MOC' WHEN Sms = 1 THEN 'MOSMS' END AS UsageType
FROM aggreg.MOC_AGR
WHERE `Day` BETWEEN {start_date} AND {end_date}
UNION ALL
SELECT
Day,SponsorPrefix,VisitedCountryId,VisitedNetworkId,Customer,Cost,Revenue
,CASE WHEN Sms = 0 THEN 'MTC' WHEN Sms = 1 THEN 'MTSMS' END AS UsageType
FROM aggreg.MTC_AGR
WHERE `Day` BETWEEN {start_date} AND {end_date}
) t
""".format(start_date=start_date, end_date=end_date)

with rb.MySqlConnect('172.18.11.40', 'BSS', 'noc', 'WcQUzkXiXwoxnFfGnRxb') as cnxn:
    df_bss_source = pd.read_sql_query(sql_srt, cnxn)
# df_bss_source.to_csv(join(downloads, output_bss_usage_file), index=False)
df_bss_source.head(3)

Unnamed: 0,Day,SponsorPrefix,VisitedCountryId,VisitedNetworkId,Customer,Cost,Revenue,UsageType
0,20200301,26006,14,50,172720,1.04174,2.21832,DATA
1,20200301,26006,14,50,172854,1.27884,2.72324,DATA
2,20200301,26006,14,51,172854,5.15409,10.9754,DATA


In [7]:
# Подготовка данных и объединение со справочниками

df_bss_processed = df_bss_source.copy()

# Замена префикса Спонсора на символьное обозначение? S1, S2...
sponsor_dict = dict((sponsor.mccmnc, sponsor.name) for sponsor in sponsors)
df_bss_processed.SponsorPrefix.replace(sponsor_dict, inplace=True)

# Подготовка широкого DF с заданием используемого значения (Cost, Revenue, Profit)
df_bss_processed = df_bss_processed.pivot_table(values=use_value,
                                                index=['Day','SponsorPrefix','VisitedNetworkId'],
                                                columns='UsageType', aggfunc=np.sum).reset_index()

# Обработка значения даты с добавлением дополнительных полей даты
df_bss_processed.Day = pd.to_datetime(df_bss_processed.Day, format='%Y%m%d')
df_bss_processed['Month'] = df_bss_processed.Day.dt.strftime('%Y-%m')

# Добавление поля TOTAL и выборка используемых в дальнейшем полей
df_bss_processed['TOTAL'] = df_bss_processed[['MOC','MTC','MOSMS','MTSMS','DATA']].sum(axis=1)
df_bss_processed = df_bss_processed[
    ['Day', 'Month','SponsorPrefix','VisitedNetworkId','MOC','MTC','MOSMS','MTSMS','DATA', 'TOTAL']].fillna(0)

df_bss_processed = df_bss_processed.loc[df_bss_processed.SponsorPrefix == 'S4']
df_bss_processed.head(3)

UsageType,Day,Month,SponsorPrefix,VisitedNetworkId,MOC,MTC,MOSMS,MTSMS,DATA,TOTAL
244,2020-03-01,2020-03,S4,261,0.0,0.0,0.0,0.0,7.66764,7.66764
245,2020-03-01,2020-03,S4,499,0.0,0.0,1.3744,0.0,4.498815,5.873215
246,2020-03-01,2020-03,S4,588,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
df_temp = pd.merge(
    df_data.groupby(['Month','Country','Tadig','VisitedNetworkId'], as_index=False)['DATA'].sum(),
    df_bss_processed.groupby(['Month','VisitedNetworkId'], as_index=False)['DATA'].sum(),
    how='outer', on=['Month','VisitedNetworkId'], suffixes=['_MB','_BSS']
)

# df_temp.drop('VisitedNetworkId', axis=1, inplace=True)
display(df_temp.sort_values(by=['Country','Tadig']).round(2))
df_temp.groupby(['Month'], as_index=False)['DATA_MB','DATA_BSS'].sum().round(2)

Unnamed: 0,Month,Country,Tadig,VisitedNetworkId,DATA_MB,DATA_BSS
0,2020-03,Egypt,EGYAR,261,46.74,45.55
1,2020-03,Israel,ISRPL,470,1.19,1.21
2,2020-03,Japan,JPNDO,499,126.58,117.52
3,2020-03,Malaysia,MYSMI,588,8.47,4.04
4,2020-03,Saudi Arabia,SAUAJ,802,3.85,0.5
5,2020-03,Thailand,THAWN,952,168.55,252.49
6,2020-03,Turkey,TURTS,972,0.0,
7,2020-03,United Arab Emirates,AREDU,996,633.3,625.94
8,2020-03,United States,USACG,1036,0.91,0.63
9,2020-03,,,770,,0.0


Unnamed: 0,Month,DATA_MB,DATA_BSS
0,2020-03,989.59,1047.89


In [27]:
df_bss_processed.groupby(['Month'], as_index=False)['MOC','MTC','MOSMS','MTSMS'].sum().unstack(1) #.pivot_table()

UsageType,Month,MOC,MTC,MOSMS,MTSMS
0,2020-03,300.007126,251.335544,234.767738,0.0


In [31]:
df_bss_processed.groupby(['Month'])['MOC','MTC','MOSMS','MTSMS'].sum().unstack(1).sum()

786.11040728

In [34]:
5699.12 * hkd_usd_rate

740.8856000000001