# MySingtel Dart vs LBS

In [1]:
import pandas as pd
from glob import glob
from bokeh.io import output_file, output_notebook, show
from bokeh.models import(
    GMapPlot, GMapOptions, ColumnDataSource, Circle, Triangle, LogColorMapper, BasicTicker, ColorBar,
    DataRange1d, PanTool, WheelZoomTool, BoxZoomTool, ZoomInTool
)
from bokeh.models.mappers import ColorMapper, LinearColorMapper
from bokeh.palettes import Viridis5

In [30]:
# doi: date of interest
doi = '20170823'
files = glob('data/weekday/merged_top20cid_'+doi+'.csv')
df_from_each_file = (pd.read_csv(f, header=0) for f in files)
df = pd.concat(df_from_each_file, ignore_index=True)

In [31]:
df.drop(df.columns[0],axis=1,inplace=True)

In [32]:
print(df.imsi.count())
df.columns = ['imsi', 'time_lbs', 'cellid', 'event', 'lat_lbs', 'lon_lbs',
              'time_dart', 'lon_dart', 'lat_dart', 'diff_sec']
df.head(2)

943895


Unnamed: 0,imsi,time_lbs,cellid,event,lat_lbs,lon_lbs,time_dart,lon_dart,lat_dart,diff_sec
0,52501602470C20002AF77E3337200CAC8146F9,2017-08-23 18:45:34,525-1-746-7369675,200,1.440519762,103.8032032,2017-08-23 18:47:20,103.800926,1.440365,106
1,52501602470C20002AF77E3337200CAC8146F9,2017-08-23 18:45:53,525-1-746-7369675,200,1.440519762,103.8032032,2017-08-23 18:47:20,103.800926,1.440365,87


In [33]:
df = df[(df.diff_sec<=10) & (df.diff_sec >=-10)]

In [34]:
print(df.imsi.count())
df_sortcell = pd.DataFrame({'count' : df.groupby(["lat_lbs", "lon_lbs", "cellid"]).size()}).reset_index()

102785


In [35]:
df_sortcell = df_sortcell.sort_values(['count'], ascending=False).reset_index(drop=True)

In [36]:
df_sortcell.head(20)

Unnamed: 0,lat_lbs,lon_lbs,cellid,count
0,1.343174,103.86069,525-1-747-7316971,2832
1,1.45013459,103.8100161,525-1-710-7362835,2502
2,1.428977,103.834083,525-1-710-7371472,2482
3,1.330331,103.741301,525-1-748-7334573,1983
4,1.317568052,103.8509492,525-1-713-7390973,1953
5,1.315338,103.765611,525-1-748-7487372,1940
6,1.350719201,103.852005,525-1-714-7383831,1902
7,1.384033633,103.743502,525-1-746-7364732,1883
8,1.325046,103.890315,525-1-747-7311373,1760
9,1.349346586,103.7404333,525-1-746-7483872,1652


In [11]:
cells = list(df['cellid'].value_counts()[:10].index)
print(cells)

['525-1-747-7316971', '525-1-742-7341676', '525-1-718-7350634', '525-1-744-7337635', '525-1-710-7366676', '525-1-746-7369675', '525-1-746-7483872', '525-1-738-95406791', '525-1-716-9561535', '525-1-746-7364732']


In [12]:
df_use = df[df['cellid'].isin(cells)]

In [13]:
import random
def get_spaced_colors(n):
    def rgb_to_hex(rgb):
        return '#%02x%02x%02x' % rgb
    max_value = 16500000 #255**3
    interval = int(max_value / n)
    # (0,0,0) is black, so we want to avoid black by starting from 10
    colors = [hex(I)[2:].zfill(6) for I in range(1000, max_value, interval)]
    return [rgb_to_hex((int(i[:2], 16), int(i[2:4], 16), int(i[4:], 16))) for i in colors]

colors = get_spaced_colors(len(cells))
cells_colors = dict(zip(cells, colors))
df_use['colors'] = df_use['cellid'].map(lambda x: cells_colors[x])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [14]:
df_lbs = df_use[['cellid', 'lat_lbs', 'lon_lbs', 'colors']]

In [15]:
map_options = GMapOptions(lat=1.353, lng=103.83, map_type="roadmap", zoom=12)

def plot_df(df_dart, df_lbs, title='Dart plot'):
    p_day = GMapPlot(
        x_range=DataRange1d(), y_range=DataRange1d(), map_options=map_options,
        plot_width=950, plot_height=700
    )
    p_day.title.text = title

    p_day.api_key = "AIzaSyDpUETNSv04jmgUFKfv-fGqX2U1e1fwUsw"
    source_dart = ColumnDataSource(
        data=dict(
            lat=df_dart.lat_dart.tolist(),
            lon=df_dart.lon_dart.tolist(),
            color=df_dart.colors
        )
    )

    circle = Circle(x="lon", y="lat", size=5, fill_color='color', fill_alpha=0.3, line_color=None)
    source_lbs = ColumnDataSource(
        data=dict(
            lat=df_lbs.lat_lbs.tolist(),
            lon=df_lbs.lon_lbs.tolist(),
            color=df_lbs.colors
        )
    )

    triangle = Triangle(x="lon", y="lat", size=14, fill_color='color', fill_alpha=0.8, line_color='black')
    p_day.add_glyph(source_dart, circle)
    p_day.add_glyph(source_lbs, triangle)
    p_day.add_tools(PanTool(), WheelZoomTool(), ZoomInTool())
    #output_file("gmap_plot.html")
    output_notebook()
    show(p_day)

plot_df(df_use, df_lbs, 'MySingtel Dart Data vs. LBS on '+doi)


In [65]:
import datetime
def within_time(start, end, time_str):
    curr_time = time_str.split(' ')[1].strip()
    curr_time = datetime.datetime.strptime(curr_time,'%H:%M:%S').time()
    if start <= end:
        return start <= curr_time <= end
    else:
        return start <= curr_time or curr_time <= end

start_time = datetime.time(10, 0, 0)
end_time = datetime.time(16, 0, 0)

In [66]:
df_daytime = df_use[df_use.apply(lambda x: within_time(start_time, end_time, x['time_dart']), axis=1)]

In [72]:
map_options = GMapOptions(lat=1.34, lng=103.75, map_type="roadmap", zoom=14)
plot_df(df_daytime, df_lbs, title='Dart in Daytime 10am - 4pm')

In [74]:
start_time = datetime.time(22, 0, 0)
end_time = datetime.time(4, 0, 0)
df_nighttime = df_use[df_use.apply(lambda x: within_time(start_time, end_time, x['time_dart']), axis=1)]
plot_df(df_nighttime, df_lbs, title='Dart in Nighttime 10pm - 4am')

In [77]:
df_daytime.imsi.count()

32599

In [76]:
df_nighttime.imsi.count()

6717