In [1]:
import pandas as pd
from schema import schema

In [2]:
# Read data
df = pd.read_csv(r'data.csv', dtype=schema, encoding='utf-8')

In [3]:
# Pre-processing
df.drop(['state'], axis=1, inplace=True)

In [4]:
df.head()

Unnamed: 0,iccid,used,plan_c,left,ac_time,product,company,carrier,plan_s,month
0,898607B4071790000000,3.931,10.0,6.069,2018-01-16 15:21:11,SPB132,挚享科技（上海）有限公司,北京华虹,10.0,2
1,898607B4071790000001,0.0,10.0,10.0,2018-01-16 15:21:44,SPB132,挚享科技（上海）有限公司,北京华虹,10.0,2
2,898607B4071790000002,4.877,10.0,5.123,2018-01-16 15:19:09,SPB132,挚享科技（上海）有限公司,北京华虹,10.0,2
3,898607B4071790000003,4.259,10.0,5.741,2018-02-09 11:10:04,SPB132,挚享科技（上海）有限公司,北京华虹,10.0,2
4,898607B4071790000004,1.74,10.0,8.26,2018-01-16 15:21:28,SPB132,挚享科技（上海）有限公司,北京华虹,10.0,2


In [5]:
counts = df['product'].value_counts()

In [6]:
# Filter out non-frequent items
df_flt = df[df['product'].isin(counts[counts >= 100].index)]
# Filter out plan_c == 0 (usage pool)
#df_flt = df_flt[df_flt['plan_c']!=0]

In [7]:
df_flt['product'].value_counts()

SPB132       992065
智能车锁          39578
智能柜           37640
洗衣机           16145
电动摩托车         10729
充电宝            1684
定位器             615
2GWiFi传感器       412
商米POS机          278
Name: product, dtype: int64

In [8]:
# Average usage by device
df_flt.groupby(['product', 'month'])['used'].mean()

product    month
2GWiFi传感器  4         60.042253
           5        266.991063
           6        139.981088
SPB132     2          3.888193
           3          4.418869
           4          4.377335
           5          3.696993
           6          2.676808
充电宝        4          2.956967
           5          2.997328
           6          2.344063
商米POS机     4          0.170667
           5          9.128721
           6          4.017312
定位器        2          2.062519
           3          2.027930
           4          0.912810
           5          1.130042
           6          0.970373
智能柜        2          6.763185
           3          8.455480
           4          7.542408
           5          7.570744
           6          5.472348
智能车锁       2          3.696198
           3          4.710720
           4          4.991057
           5          5.263135
           6          2.820215
洗衣机        2          2.411543
           3          2.983239
           4          

In [9]:
# Split product SPB132
df_noplan = df_flt[df_flt['plan_c']==0]
df_hasplan = df_flt[df_flt['plan_c']!=0]

In [10]:
df_hasplan.head()

Unnamed: 0,iccid,used,plan_c,left,ac_time,product,company,carrier,plan_s,month
0,898607B4071790000000,3.931,10.0,6.069,2018-01-16 15:21:11,SPB132,挚享科技（上海）有限公司,北京华虹,10.0,2
1,898607B4071790000001,0.0,10.0,10.0,2018-01-16 15:21:44,SPB132,挚享科技（上海）有限公司,北京华虹,10.0,2
2,898607B4071790000002,4.877,10.0,5.123,2018-01-16 15:19:09,SPB132,挚享科技（上海）有限公司,北京华虹,10.0,2
3,898607B4071790000003,4.259,10.0,5.741,2018-02-09 11:10:04,SPB132,挚享科技（上海）有限公司,北京华虹,10.0,2
4,898607B4071790000004,1.74,10.0,8.26,2018-01-16 15:21:28,SPB132,挚享科技（上海）有限公司,北京华虹,10.0,2


In [11]:
df_hasplan.describe()

Unnamed: 0,used,plan_c,left,plan_s,month
count,910452.0,910452.0,910452.0,910452.0,910452.0
mean,3.999888,11.383774,7.523382,11.178294,4.01508
std,6.693716,5.075498,5.405439,4.709302,1.412036
min,0.0,10.0,0.0,10.0,2.0
25%,0.0,10.0,4.105,10.0,3.0
50%,4.313,10.0,6.114,10.0,4.0
75%,6.131,10.0,10.0,10.0,5.0
max,789.941,30.0,30.0,30.0,6.0


In [12]:
# Compute paid cost
from metric import get_best_plan_and_cost, compute_cost_with_plan_and_usage
df_hasplan['best_plan'], df_hasplan['best_cost'], df_hasplan['cost_with_plan_c'] = \
    zip(*df_hasplan.apply(lambda row: get_best_plan_and_cost(row['used'], row['plan_c']), axis=1))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


In [13]:
# User perceived cost
_, _, df_hasplan['cost_with_plan_s'] = \
    zip(*df_hasplan.apply(lambda row: get_best_plan_and_cost(row['used'], row['plan_s']), axis=1))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


In [14]:
df_hasplan.tail()

Unnamed: 0,iccid,used,plan_c,left,ac_time,product,company,carrier,plan_s,month,best_plan,best_cost,cost_with_plan_c,cost_with_plan_s
1043414,898602B9211770016451,3.304,30.0,26.696,2018-03-08 00:31:57,智能柜,杭州云造科技有限公司,中国移动物联网平台广西分公司,30.0,6,0,0.95816,5.0,5.0
1043415,898602B9211770016452,0.0,30.0,30.0,2018-03-08 00:22:20,智能柜,杭州云造科技有限公司,中国移动物联网平台广西分公司,30.0,6,0,0.0,5.0,5.0
1043416,898602B9211770016453,0.0,30.0,30.0,2018-03-08 00:20:46,智能柜,杭州云造科技有限公司,中国移动物联网平台广西分公司,30.0,6,0,0.0,5.0,5.0
1043417,898602B9211770016454,8.103,30.0,21.897,2018-03-01 08:57:26,智能车锁,杭州云造科技有限公司,中国移动物联网平台广西分公司,30.0,6,0,2.34987,5.0,5.0
1043418,898602B9211770016459,0.122,30.0,29.878,2018-03-08 00:20:39,智能柜,杭州云造科技有限公司,中国移动物联网平台广西分公司,30.0,6,0,0.03538,5.0,5.0


In [15]:
df_hasplan.describe()

Unnamed: 0,used,plan_c,left,plan_s,month,best_plan,best_cost,cost_with_plan_c,cost_with_plan_s
count,910452.0,910452.0,910452.0,910452.0,910452.0,910452.0,910452.0,910452.0,910452.0
mean,3.999888,11.383774,7.523382,11.178294,4.01508,0.398593,1.121513,3.178831,3.178591
std,6.693716,5.075498,5.405439,4.709302,1.412036,8.061034,1.026953,1.626659,1.702394
min,0.0,10.0,0.0,10.0,2.0,0.0,0.0,3.0,3.0
25%,0.0,10.0,4.105,10.0,3.0,0.0,0.0,3.0,3.0
50%,4.313,10.0,6.114,10.0,4.0,0.0,1.25077,3.0,3.0
75%,6.131,10.0,10.0,10.0,5.0,0.0,1.77799,3.0,3.0
max,789.941,30.0,30.0,30.0,6.0,1000.0,40.0,225.38289,229.18289


In [16]:
df_diff_plan = df_hasplan[df_hasplan['plan_c']!=df_hasplan['plan_s']]

In [17]:
len(df_diff_plan)

9354

In [18]:
# Strategy 1: use last month's best plan as next month's plan_c
df_apr = df_hasplan[df_hasplan['month']==4]
df_may = df_hasplan[df_hasplan['month']==5]
# for simplicity, select only devices which exists in both months
iccid_apr = df_apr.iccid.tolist()
df_may = df_may[df_may['iccid'].isin(iccid_apr)]

In [19]:
len(df_may)

183247

In [20]:
df_may.describe()

Unnamed: 0,used,plan_c,left,plan_s,month,best_plan,best_cost,cost_with_plan_c,cost_with_plan_s
count,183247.0,183247.0,183247.0,183247.0,183247.0,183247.0,183247.0,183247.0,183247.0
mean,4.101045,11.45454,7.534969,11.249679,5.0,0.424836,1.143621,3.198081,3.193722
std,7.696244,5.193771,5.691589,4.840663,0.0,9.046418,1.200516,1.852226,1.93293
min,0.0,10.0,0.0,10.0,5.0,0.0,0.0,3.0,3.0
25%,0.0,10.0,2.521,10.0,5.0,0.0,0.0,3.0,3.0
50%,3.846,10.0,8.096,10.0,5.0,0.0,1.11534,3.0,3.0
75%,7.691,10.0,10.0,10.0,5.0,0.0,2.23039,3.0,3.0
max,675.345,30.0,30.0,30.0,5.0,700.0,32.0,192.15005,195.95005


In [21]:
# Apr's best strategy
d = df_apr.set_index('iccid')['best_plan'].to_dict()

In [22]:
df_may['strategy_1'] = df_may['iccid'].map(d)
df_may['cost_with_strategy_1'] = df_may.apply(lambda row: compute_cost_with_plan_and_usage(row['strategy_1'], row['used']), axis=1)

In [23]:
df_may.head()

Unnamed: 0,iccid,used,plan_c,left,ac_time,product,company,carrier,plan_s,month,best_plan,best_cost,cost_with_plan_c,cost_with_plan_s,strategy_1,cost_with_strategy_1
626150,898607B4071790000000,7.533,10.0,2.467,2018-01-16 15:21:11,SPB132,挚享科技（上海）有限公司,北京华虹,10.0,5,0,2.18457,3.0,3.0,0,2.18457
626151,898607B4071790000001,7.957,10.0,2.043,2018-01-16 15:21:44,SPB132,挚享科技（上海）有限公司,北京华虹,10.0,5,0,2.30753,3.0,3.0,0,2.30753
626152,898607B4071790000002,0.0,10.0,10.0,2018-01-16 15:19:09,SPB132,挚享科技（上海）有限公司,北京华虹,10.0,5,0,0.0,3.0,3.0,0,0.0
626153,898607B4071790000003,0.0,10.0,10.0,2018-02-09 11:10:04,SPB132,挚享科技（上海）有限公司,北京华虹,10.0,5,0,0.0,3.0,3.0,0,0.0
626154,898607B4071790000004,0.0,10.0,10.0,2018-01-16 15:21:28,SPB132,挚享科技（上海）有限公司,北京华虹,10.0,5,0,0.0,3.0,3.0,0,0.0


In [24]:
df_may.describe()

Unnamed: 0,used,plan_c,left,plan_s,month,best_plan,best_cost,cost_with_plan_c,cost_with_plan_s,strategy_1,cost_with_strategy_1
count,183247.0,183247.0,183247.0,183247.0,183247.0,183247.0,183247.0,183247.0,183247.0,183247.0,183247.0
mean,4.101045,11.45454,7.534969,11.249679,5.0,0.424836,1.143621,3.198081,3.193722,0.470349,1.17509
std,7.696244,5.193771,5.691589,4.840663,0.0,9.046418,1.200516,1.852226,1.93293,8.229794,1.714331
min,0.0,10.0,0.0,10.0,5.0,0.0,0.0,3.0,3.0,0.0,0.0
25%,0.0,10.0,2.521,10.0,5.0,0.0,0.0,3.0,3.0,0.0,0.0
50%,3.846,10.0,8.096,10.0,5.0,0.0,1.11534,3.0,3.0,0.0,1.11737
75%,7.691,10.0,10.0,10.0,5.0,0.0,2.23039,3.0,3.0,0.0,2.23184
max,675.345,30.0,30.0,30.0,5.0,700.0,32.0,192.15005,195.95005,700.0,150.81664


In [25]:
# where does strategy 1 performs worst?
df_may['diff_strategy_1'] = df_may['cost_with_strategy_1'] - df_may['best_cost']
df_may.sort_values('diff_strategy_1', ascending=False).drop(['ac_time','left','month','plan_s','carrier'],axis=1).head()

Unnamed: 0,iccid,used,plan_c,product,company,best_plan,best_cost,cost_with_plan_c,cost_with_plan_s,strategy_1,cost_with_strategy_1,diff_strategy_1
736954,898607B4071790061036,532.816,30.0,SPB132,挚享科技（上海）有限公司,700,32.0,150.81664,154.61664,30,150.81664,118.81664
736359,898607B4071790060425,503.447,30.0,SPB132,挚享科技（上海）有限公司,500,24.99963,142.29963,146.09963,100,124.99963,100.0
736236,898607B4071790060293,418.358,30.0,SPB132,挚享科技（上海）有限公司,500,24.0,117.62382,121.42382,30,117.62382,93.62382
737092,898607B4071790061183,457.97,30.0,SPB132,挚享科技（上海）有限公司,500,24.0,129.1113,132.9113,100,111.8113,87.8113
736182,898607B4071790060238,381.362,30.0,SPB132,挚享科技（上海）有限公司,500,24.0,106.89498,110.69498,0,110.59498,86.59498


In [26]:
df_may.describe()

Unnamed: 0,used,plan_c,left,plan_s,month,best_plan,best_cost,cost_with_plan_c,cost_with_plan_s,strategy_1,cost_with_strategy_1,diff_strategy_1
count,183247.0,183247.0,183247.0,183247.0,183247.0,183247.0,183247.0,183247.0,183247.0,183247.0,183247.0,183247.0
mean,4.101045,11.45454,7.534969,11.249679,5.0,0.424836,1.143621,3.198081,3.193722,0.470349,1.17509,0.031469
std,7.696244,5.193771,5.691589,4.840663,0.0,9.046418,1.200516,1.852226,1.93293,8.229794,1.714331,0.933096
min,0.0,10.0,0.0,10.0,5.0,0.0,0.0,3.0,3.0,0.0,0.0,0.0
25%,0.0,10.0,2.521,10.0,5.0,0.0,0.0,3.0,3.0,0.0,0.0,0.0
50%,3.846,10.0,8.096,10.0,5.0,0.0,1.11534,3.0,3.0,0.0,1.11737,0.0
75%,7.691,10.0,10.0,10.0,5.0,0.0,2.23039,3.0,3.0,0.0,2.23184,0.0
max,675.345,30.0,30.0,30.0,5.0,700.0,32.0,192.15005,195.95005,700.0,150.81664,118.81664


In [28]:
# Usage pool
df_flt_may = df_flt[df_flt['month']==5]

In [29]:
df_flt_may.describe()

Unnamed: 0,used,plan_c,left,plan_s,month
count,233179.0,233179.0,233179.0,233179.0,233179.0
mean,4.068857,9.030187,5.94385,13.843828,5.0
std,10.975288,6.576714,5.923328,14.769959,0.0
min,0.0,0.0,0.0,10.0,5.0
25%,0.0,10.0,1.293,10.0,5.0
50%,2.952,10.0,4.718,10.0,5.0
75%,7.411,10.0,10.0,10.0,5.0
max,1131.039,30.0,30.0,500.0,5.0


In [30]:
sum(df_flt_may['used'])

948771.8970000054

In [31]:
sum(df_flt_may['plan_c'])

2105650.0

In [32]:
sum(df_flt_may['plan_s'])

3228090.0

In [33]:
df_may.tail(3)

Unnamed: 0,iccid,used,plan_c,left,ac_time,product,company,carrier,plan_s,month,best_plan,best_cost,cost_with_plan_c,cost_with_plan_s,strategy_1,cost_with_strategy_1,diff_strategy_1
809911,898602B9211770016453,2.383,30.0,27.617,2018-03-08 00:20:46,智能柜,杭州云造科技有限公司,中国移动物联网平台广西分公司,30.0,5,0,0.69107,5.0,5.0,0,0.69107,0.0
809912,898602B9211770016454,0.006,30.0,29.994,2018-03-01 08:57:26,智能车锁,杭州云造科技有限公司,中国移动物联网平台广西分公司,30.0,5,0,0.00174,5.0,5.0,0,0.00174,0.0
809913,898602B9211770016459,0.08,30.0,29.92,2018-03-08 00:20:39,智能柜,杭州云造科技有限公司,中国移动物联网平台广西分公司,30.0,5,0,0.0232,5.0,5.0,0,0.0232,0.0


In [34]:
# No usage pool
cnt_device = len(df_may)
cnt_device_unused = len(df_may[df_may['used']==0])
print("Total Devices in May that are activated (and have a plan) before May: {}".format(cnt_device))
print("Device with zero used data: {}".format(cnt_device_unused))
print("May cost - best plan: ${0:.2f}".format(sum(df_may['best_cost'])))
print("May cost - plan_c   : ${0:.2f}".format(sum(df_may['cost_with_plan_c'])))
print("May cost - strategy1: ${0:.2f}".format(sum(df_may['cost_with_strategy_1'])))

Total Devices in May that are activated (and have a plan) before May: 183247
Device with zero used data: 71685
May cost - best plan: $209565.20
May cost - plan_c   : $586038.71
May cost - strategy1: $215331.79


In [60]:
# Usage pool, plan cost
from importlib import reload
import metric
from metric import get_plan_fix_cost
reload(metric)
#reload(get_plan_fix_cost)
df_may_full['plan_cost'] = df_may_full['plan_c'].apply(get_plan_fix_cost)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [65]:
sum(df_may_full['plan_cost'])

578211.0

In [None]:
# With usage pool
#df_may_full = df[df['month']==5]
print("Total devices in May: {}".format(len(df_may_full)))
print("Total usage in May  : {0:.0f}".format(sum(df_may_full['used'])))
print("Total plan_c in May : {0:.0f}".format(sum(df_may_full['plan_c'])))
print("Extra cost vs. best : ${0:.2f}".format(0.29 * (sum(df_may_full['plan_cost']) - sum(df_may_full['used'])) ))
print("Total plan_s in May : {0:.0f}".format(sum(df_may_full['plan_s'].fillna(0))))  # note some N/A in plan_s

In [None]:
df_may_full[df_may_full['plan_s'].isna()]

In [None]:
df.groupby('month')['iccid'].count()

In [None]:
df.groupby('month')['iccid'].nunique()

In [None]:
df.groupby('month')['used'].sum()

In [97]:
from importlib import reload
reload(metric)

<module 'metric' from 'D:\\device_usage\\metric.py'>

In [88]:
# Read data
df = pd.read_csv(r'data.csv', dtype=schema, encoding='utf-8')
# Pre-processing
df.drop(['state'], axis=1, inplace=True)
df.fillna(0, inplace=True)

In [93]:
df['plan_c'].value_counts()

10.0      847459
0.0       188760
30.0       63016
6.0            5
2048.0         2
1024.0         2
Name: plan_c, dtype: int64

In [99]:
df['plan_s'].value_counts()

10.0      956321
30.0      142399
500.0        412
3072.0        30
5.0           27
0.0           23
2048.0        16
4096.0         9
1024.0         2
Name: plan_s, dtype: int64

In [94]:
# Remove outlier  (plan_c == 6)
df = df[df['plan_c']!=6]

In [95]:
from metric import *
df['best_plan'], df['best_cost'], df['cost_with_plan_c'] = \
    zip(*df.apply(lambda row: get_best_plan_and_cost(row['used'], row['plan_c']), axis=1))

In [98]:
# User perceived cost
_, _, df['cost_with_plan_s'] = \
    zip(*df.apply(lambda row: get_best_plan_and_cost(row['used'], row['plan_s']), axis=1))

In [100]:
# Load Apr and May data
df_apr = df[df['month']==4]
df_may = df[df['month']==5]

In [101]:
# Actual cost
df_may['actual_cost'] = df_may['plan_c'].apply(get_plan_fix_cost)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


In [102]:
print("Acutal cost in May: ${}".format(sum(df_may['actual_cost'])))

Acutal cost in May: $578211.0


In [121]:
# Strategy 1: use last month's best plan as next month's strategy (plan_c)
# for simplicity, select only devices which exists in both months
iccid_apr = df_apr.iccid.tolist()
df_may_flt = df_may[df_may['iccid'].isin(iccid_apr)]
d = df_apr.set_index('iccid')['best_plan'].to_dict()
df_may_flt['strategy_1'] = df_may_flt['iccid'].map(d)
df_may_flt['cost_with_strategy_1'] = df_may_flt.apply(lambda row: compute_cost_with_plan_and_usage(row['strategy_1'], row['used']), axis=1)
df_may_flt['fixed_cost_strategy_1'] = df_may_flt['strategy_1'].apply(get_plan_fix_cost)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [122]:
df_may_flt.head()

Unnamed: 0,iccid,used,plan_c,left,ac_time,product,company,carrier,plan_s,month,best_plan,best_cost,cost_with_plan_c,cost_with_plan_s,actual_cost,strategy_1,cost_with_strategy_1,fixed_cost_strategy_1
626150,898607B4071790000000,7.533,10.0,2.467,2018-01-16 15:21:11,SPB132,挚享科技（上海）有限公司,北京华虹,10.0,5,10,3.0,3.0,3.0,3.0,10,3.0,3.0
626151,898607B4071790000001,7.957,10.0,2.043,2018-01-16 15:21:44,SPB132,挚享科技（上海）有限公司,北京华虹,10.0,5,10,3.0,3.0,3.0,3.0,2,7.099968,1.0
626152,898607B4071790000002,0.0,10.0,10.0,2018-01-16 15:19:09,SPB132,挚享科技（上海）有限公司,北京华虹,10.0,5,0,0.0,3.0,3.0,3.0,0,0.0,0.0
626153,898607B4071790000003,0.0,10.0,10.0,2018-02-09 11:10:04,SPB132,挚享科技（上海）有限公司,北京华虹,10.0,5,0,0.0,3.0,3.0,3.0,0,0.0,0.0
626154,898607B4071790000004,0.0,10.0,10.0,2018-01-16 15:21:28,SPB132,挚享科技（上海）有限公司,北京华虹,10.0,5,0,0.0,3.0,3.0,3.0,0,0.0,0.0


In [145]:
# Cost
cnt_device = len(df_may)
cnt_device_unused = len(df_may_flt[df_may_flt['used']==0])
print("Total Devices in May (activated before May): {}".format(cnt_device))
print("Device with zero used data: {}".format(cnt_device_unused))

print("\nNo data pool cost:")
print("May cost - plan_c   : ${0:.2f}".format(sum(df_may_flt['cost_with_plan_c'])))
print("May cost - strategy1: ${0:.2f}".format(sum(df_may_flt['cost_with_strategy_1'])))
print("May cost - best plan: ${0:.2f}".format(sum(df_may_flt['best_cost'])))

print("\nWith data pool:")
print("May cost - plan_c   : ${0:.2f}".format(sum(df_may_flt['actual_cost'])))
print("May cost - strategy1: ${0:.2f}".format(sum(df_may_flt['fixed_cost_strategy_1'])))

Total Devices in May (activated before May): 233203
Device with zero used data: 83410

No data pool cost:
May cost - plan_c   : $746939.47
May cost - strategy1: $453354.54
May cost - best plan: $340103.69

With data pool:
May cost - plan_c   : $576415.00
May cost - strategy1: $376744.00


In [141]:
# Data usage
print("Total usage in May  : {0:.0f} GB".format(sum(df_may_flt['used'])/1024))
print("Total plan_c in May : {0:.0f} GB".format(sum(df_may_flt['plan_c'])/1024))
print("Total plan_s in May : {0:.0f} GB".format(sum(df_may_flt['plan_s'])/1024))
print("Total quota with strategy1 in May : {0:.0f} GB".format(sum(df_may_flt['strategy_1'])/1024))
print("Total best plan quota in May : {0:.0f} GB".format(sum(df_may_flt['best_plan'])/1024))

Total usage in May  : 887 GB
Total plan_c in May : 2050 GB
Total plan_s in May : 2641 GB
Total quota with strategy1 in May : 1208 GB
Total best plan quota in May : 1081 GB
