# Размещение баннеров

In [76]:
import pandas as pd
import numpy as np
import numpy.linalg
from io import StringIO
from collections import defaultdict

import folium as fl
from folium.plugins import MarkerCluster

In [77]:
try:
    df = pd.read_csv('checkins.trimmed.csv')
except OSError:
    with open('checkins.dat') as f:
        checkins = f.read().replace(" ", "")

    df = pd.read_csv(StringIO(checkins), delimiter='|').dropna()[:100000]
    df.to_csv('checkins.trimmed.csv')

X = df[['latitude', 'longitude']]

In [78]:
df.head(5)

Unnamed: 0.1,Unnamed: 0,id,user_id,venue_id,latitude,longitude,created_at
0,2,984222,15824.0,5222.0,38.895112,-77.036366,2012-04-2117:43:47
1,4,984234,44652.0,5222.0,33.800745,-84.41052,2012-04-2117:43:43
2,8,984291,105054.0,5222.0,45.523452,-122.676207,2012-04-2117:39:22
3,10,984318,2146539.0,5222.0,40.764462,-111.904565,2012-04-2117:35:46
4,11,984232,93870.0,380645.0,33.448377,-112.074037,2012-04-2117:38:18


In [79]:
X.head(5)

Unnamed: 0,latitude,longitude
0,38.895112,-77.036366
1,33.800745,-84.41052
2,45.523452,-122.676207
3,40.764462,-111.904565
4,33.448377,-112.074037


In [80]:
from sklearn.cluster import MeanShift

clf = MeanShift(bandwidth=0.1)
pred = clf.fit_predict(X)

In [81]:
print(pred)

[ 5  7 30 ..., 25 19  4]


In [82]:
print(clf.cluster_centers_)

[[  40.7177164   -73.99183542]
 [  33.44943805 -112.00213969]
 [  33.44638027 -111.90188756]
 ..., 
 [  28.3647248   -82.1959178 ]
 [  37.684167    -77.885556  ]
 [  40.7694969   -83.8227148 ]]


In [83]:
freq = defaultdict(int)
for e in pred:
    freq[e] += 1

filtered = list(filter(lambda x: freq[x[0]] >= 15, enumerate(clf.cluster_centers_)))

print('clusters count: {}'.format(len(clf.cluster_centers_)))
print('clusters with >= 15 points: {}'.format(len(filtered)))

clusters count: 3231
clusters with >= 15 points: 624


In [84]:
def save_map_with(clusters, offices, outfile):
    map_ = fl.Map()
    map_.add_children(MarkerCluster(clusters))
    
    for office in offices:
        fl.Marker(office, popup='office',
                  icon=fl.Icon(color='red',icon='info-sign')).add_to(map_)

    map_.save(outfile)

In [86]:
offices = {
    'Los Angeles': (33.751277, -118.188740),
    'Miami': (25.867736, -80.324116),
    'London': (51.503016, -0.075479),
    'Amsterdam': (52.378894, 4.885084),
    'Beijing': (39.366487, 117.036146),
    'Sydney': (-33.868457, 151.205134)
}

save_map_with(list(map(lambda x: x[1], filtered)), offices.values(), 'map_filtered.html')

In [87]:
%%HTML
<iframe width="100%" height="500" src="map_filtered.html"/>

In [93]:
centers_with_dist = []
for center in map(lambda x: x[1], filtered):
    min_dist = 1e20
    nearest_office = None
    for office in offices:
        dist = numpy.linalg.norm(np.array(center) - np.array(offices[office]))
        if dist < min_dist:
            min_dist = dist
            nearest_office = office
    centers_with_dist.append((min_dist, nearest_office, center))

top_20 = sorted(centers_with_dist, key=lambda x: x[0])[:20]
save_map_with(list(map(lambda x: x[2], top_20)), offices.values(), 'map_top20.html')

In [94]:
%%HTML
<iframe width="100%" height="500" src="map_top20.html"/>

In [90]:
save_map_with([top_20[0][2]], [offices[top_20[0][1]]], 'map_top1.html')

In [91]:
%%HTML
<iframe width="100%" height="500" src="map_top1.html"/>

In [92]:
with open('result.txt', 'w+') as f:
    f.write(' '.join(map(str, top_20[0][2])))