In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from nltk.tag import pos_tag
from nltk.tokenize import WordPunctTokenizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split,cross_val_score
from collections import Counter
import json

from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE


from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn import cluster, preprocessing, metrics

from encryptedpickle import encryptedpickle
import pickle
from facepy import GraphAPI
#import credentials
from geotext import GeoText

from datetime import datetime
import calendar
import string
import time
import hashlib

import matplotlib.pyplot as plt
%matplotlib inline

from IPython.display import IFrame
from IPython.core.display import display


In [23]:
class geoloc(object):

    def __init__(self,geolocfile):
        try:
            self.g_loc_raw = pd.read_json(geolocfile)

        except IOError:
            print "File not found or could not be read."

        gmaps_columns = ['timestamp','lat','lng','acc','vel','heading','altitude','v_acc']
        gmaps_coord_disp = 10.**7

        loc_hist = []

        for datapoint in self.g_loc_raw['locations']:

            try:
                timestamp = datetime.fromtimestamp((int(datapoint['timestampMs'])/1000)).strftime('%Y-%m-%d %H:%M:%S')
            except:
                timestamp = ''
            try:
                lat = datapoint['latitudeE7']/(gmaps_coord_disp)
            except:
                lat = '0'
            try:
                lng = datapoint['longitudeE7']/(gmaps_coord_disp)
            except:
                lng = '0'
            try:
                alt = datapoint['altitude']
            except:
                alt = '0'
            try:
                v_acc = datapoint['verticalAccuracy']
            except:
                v_acc = '0'
            try:
                vel = datapoint['velocity']
            except:
                vel = '0'
            try:
                heading = datapoint['heading']
            except:
                heading = '0'
            try:
                acc = datapoint['accuracy']
            except:
                acc = '0'

            loc_hist.append([timestamp, lat, lng, acc, vel, heading, alt, v_acc])
            
        self.gloc_hist = pd.DataFrame(loc_hist, columns=gmaps_columns)

    def cluster_proc(self):
        geoc = np.array(self.gloc_hist[['lat','lng']])

        kmlist = []

        for n in range(2,100):
            lkm = KMeans(n_clusters=n)
            ltkm = lkm.fit(geoc)
            ltkm_score = metrics.silhouette_score(geoc, ltkm.labels_, metric='euclidean')
            kmlist.append([n,ltkm_score])

        klist = np.array(kmlist)

        for n in klist:
            if n[1]==np.amax(klist[3:100,1]):
                n_max =n[0]
                sc_max = np.amax(klist[3:100,1])

        lkm = KMeans(n_clusters=int(n_max))
        ltkm = lkm.fit(geoc)

        self.gloc_hist = self.gloc_hist.drop(['vel'], axis=1).drop(['altitude'], axis=1)
        self.gloc_hist['poi_id'] = ltkm.labels_
        
        self.gloc_hist['lat4'] = [round(glat,4) for glat in self.gloc_hist['lat']]
        self.gloc_hist['lng4'] = [round(glng,4) for glng in self.gloc_hist['lng']]
        self.gloc_hist['lat5'] = [round(glat,5) for glat in self.gloc_hist['lat']]
        self.gloc_hist['lng5'] = [round(glng,5) for glng in self.gloc_hist['lng']]

        self.poi_id_list = self.gloc_hist['poi_id'].value_counts().to_frame(name='Count').reset_index()
        self.poi_sig = self.poi_id_list['Count'].describe()

        self.min_sig = self.poi_sig.ix['mean']+self.poi_sig.ix['std']

        if len(self.poi_id_list[(self.poi_id_list['Count']>self.min_sig)]['index'].values) < 2:
            self.min_sig = self.poi_sig.ix['mean']+self.poi_sig.ix['std']
        else:
            self.min_sig = self.poi_sig.ix['mean']+(self.poi_sig.ix['std']*2)

        self.poi_points = self.poi_id_list[(self.poi_id_list['Count']>self.min_sig)]['index'].values

        self.gloc_hist['timestamp'] = pd.to_datetime(self.gloc_hist['timestamp'])
        self.gloc_hist['dotw'] = [gldt.dayofweek for gldt in self.gloc_hist['timestamp']]
        self.gloc_hist['hotd'] = [gldt.hour for gldt in self.gloc_hist['timestamp']]

        self.poi_dotw_df_list = []

        for n_poi in range(0,len(self.poi_points)):
            self.poi_dotw_df_list.append(self.gloc_hist[(self.gloc_hist['poi_id']==self.poi_points[n_poi])]['dotw'].describe().values)

        poi_freq_cols = ['count','mean','std','min','25p','50p','75p','max']

        self.poi_freq_df = pd.DataFrame(self.poi_dotw_df_list,columns=poi_freq_cols, index=[self.poi_points])
        self.selected_poi_for_freq_query = self.poi_freq_df['std'].idxmin()
        self.most_freq_dotw_for_pot = self.gloc_hist[(self.gloc_hist['poi_id']==self.selected_poi_for_freq_query)]['dotw'].value_counts().index[0]

        self.valid_answer = calendar.day_name[self.most_freq_dotw_for_pot].lower()

    def mean_lat(self):
        return self.gloc_hist[(self.gloc_hist['poi_id']==self.selected_poi_for_freq_query)]['lat'].mean()

    def mean_lng(self):
        return self.gloc_hist[(self.gloc_hist['poi_id']==self.selected_poi_for_freq_query)]['lng'].mean()

    def get_glhist(self):
        return self.gloc_hist

    def showmap(self,maplat,maplng,zoom=16):
        maps_url = "http://maps.google.com/maps?q={0}+{1}&z={2}&output=embed&iwloc=near".format(maplat,maplng,zoom)
        display(IFrame(maps_url, '400px', '300px'))

In [24]:
testgeo = geoloc('LocationHistory-20161107.json')

In [25]:
#testgeo.get_glhist()

In [26]:
testgeo.cluster_proc()

In [27]:
testgeo.mean_lat()

34.06018050000001

In [28]:
testgeo.mean_lng()

-118.28115463749998

In [29]:
testgeo.showmap(testgeo.mean_lat(),testgeo.mean_lng())

http://maps.google.com/maps?q=34.0601805+-118.281154637&z=16&output=embed&iwloc=near


In [30]:
class lbc(object):

    testdevid = '1425272220649281'


    def __init__(self,filename):
        self.token = 'a'
        self.geoprocdata = geoloc(filename)
        self.geoprocdata.cluster_proc()

    def tokemon(self,devid = testdevid):
        self.to_encode = devid + str(int(round(time.time())))
        self.encoded = hashlib.sha224(self.to_encode).hexdigest()
        self.token = self.encoded
        return self.token

#     def verify_token(self):
#         return self.token

    def genlbc(self):

        hint = "none"
        question = "What day do you visit this place most often?"

        self.challenge = [question, hint]
        self.gen_token = self.tokemon()
        
        return self.challenge, self.gen_token

    def passlbc(self,answer,passed_token):

        # Validate token first

        if passed_token != self.token:
            return 0
        else:
            if answer==self.geoprocdata.valid_answer:
                return 1
            if answer!=self.geoprocdata.valid_answer:
                return -1

    def lbc_auth(self,challenge_response,token):
        
        self.tries=3

        if self.tries>0:
            self.tries -= 1
            response = chal_answer.lower().translate(None, string.punctuation)
            attempt = self.passlbc(response,token)
            if attempt==1:
                print "Login OK - You have been identified as an authorized user."
                break
            elif attempt==-1:
                print "Authenticated failed. Please try again."
            elif attempt==0:
                print "Insecure authentication attempt detected. System may be compromised."
                break
        else:
            print "Authentication Failed"


In [31]:
newauth = lbc('LocationHistory-20161107.json')

In [108]:
newauth.tokemon()

'f36700d2d67987786754e0ae06a09fed579cafe05671833cc8713029'

In [32]:
newauth.genlbc()

(['What day do you visit this place most often?', 'none'],
 '26862a38cf4360ba91f3508285d70b7fd0989a77ee9cbe39fc214563')

In [33]:
newauth.verify_token()

'26862a38cf4360ba91f3508285d70b7fd0989a77ee9cbe39fc214563'

In [35]:
newauth.validate_answer()

'sunday'

In [36]:
newauth.lbc_auth('geoloc')

http://maps.google.com/maps?q=34.0601805+-118.281154637&z=16&output=embed&iwloc=near


What day do you visit this place most often?Sunday
Login OK - You have been identified as an authorized user.
