In [1]:
import pickle
import json
import numpy as np
import pandas as pd
import math

from bokeh.plotting import figure, show, output_notebook
from bokeh.tile_providers import CARTODBPOSITRON

In [2]:
def fix_JSON(json_message=None):
    '''This function fixes errors in the JSON if any and return a json dictionary'''
    result = None
    try:        
        result = json.loads(json_message)
    except Exception as e:      
        # Find the offending character index:
        e_message = str(e)
        idx_to_replace = int(e_message.split(' ')[-1].replace(')',''))   
        
        #print(e_message,idx_to_replace)
        # Remove the offending character:
        json_message = list(json_message)
        json_message[idx_to_replace] = ' '
        new_message = ''.join(json_message)     
        return fix_JSON(json_message=new_message)
    return result

In [3]:
def get_lat_long(bbox):
    bbox = bbox['coordinates']
    try:
        bb = np.array(bbox[0])
        lat = np.average(bb[:,0])
        lon = np.average(bb[:,1])
    except:
        lat,lon = None,None
    return lon,lat

In [4]:
def get_x(f):
    def g(*args):
        rv = f(*args)
        return rv[0]
    return g

def get_y(f):
    def g(*args):
        rv = f(*args)
        return rv[1]
    return g

In [25]:
@get_y
def merc(row):
    lat = row['lat']
    lon = row['lon']
    if lat is None or lon is None:
        return (0,0)
    r_major = 6378137.000
    x = r_major * math.radians(lon)
    scale = x/lon
    y = 180.0/math.pi * math.log(math.tan(math.pi/4.0 + lat * (math.pi/180.0)/2.0)) * scale
    return (x, y)

## Processinf file : neighborhoods.json

In [6]:
BASE_PATH = 'E:\Datasets\TWITTER_SOCG\CONTROL\%s'
file_name = 'neighborhoods.json'
file = BASE_PATH%(file_name)

In [7]:
neigh = []

In [8]:
with open(file) as f:
    for line in f:
        tmp_all = fix_JSON(line)
        lat,lon = get_lat_long(tmp_all.get('bounding_box',None))
        tmp_all['lat'] = lat
        tmp_all['lon'] = lon
        neigh.append(tmp_all)

In [9]:
df = pd.DataFrame.from_records(neigh)

In [10]:
df.columns

Index(['attributes', 'bounding_box', 'contained_within', 'country',
       'country_code', 'full_name', 'id', 'lat', 'lon', 'name', 'place_type',
       'url'],
      dtype='object')

In [12]:
df['coord_x'] = df.apply (lambda row: merc(row),axis=1) ## Toggle decorator

In [14]:
df['coord_y'] = df.apply (lambda row: merc(row),axis=1) ## Toggle decorator

In [15]:
df.head()

Unnamed: 0,attributes,bounding_box,contained_within,country,country_code,full_name,id,lat,lon,name,place_type,url,coord_x,coord_y
0,{},"{'type': 'Polygon', 'coordinates': [[[-122.406...",[],United States,US,"The Embarcadero, San Francisco",90942366be65cd2c,37.792149,-122.395603,The Embarcadero,neighborhood,https://api.twitter.com/1.1/geo/id/90942366be6...,-13625020.0,4550105.0
1,{},"{'type': 'Polygon', 'coordinates': [[[-73.9126...",[],United States,US,"Jackson Heights, Queens",183671f841b462d5,40.756939,-73.896411,Jackson Heights,neighborhood,https://api.twitter.com/1.1/geo/id/183671f841b...,-8226111.0,4976556.0
2,{},"{'type': 'Polygon', 'coordinates': [[[-84.4049...",[],United States,US,"Lakewood Heights, Atlanta",4cab0cff30803a20,33.705999,-84.38597,Lakewood Heights,neighborhood,https://api.twitter.com/1.1/geo/id/4cab0cff308...,-9393803.0,3989393.0
3,{},"{'type': 'Polygon', 'coordinates': [[[-122.414...",[],United States,US,"Chinatown, San Francisco",e181b00c2f52bb2d,37.797589,-122.408835,Chinatown,neighborhood,https://api.twitter.com/1.1/geo/id/e181b00c2f5...,-13626490.0,4550871.0
4,{},"{'type': 'Polygon', 'coordinates': [[[-73.9386...",[],United States,US,"Fort George, Manhattan",255794afe77688b1,40.856943,-73.928499,Fort George,neighborhood,https://api.twitter.com/1.1/geo/id/255794afe77...,-8229683.0,4991264.0


In [16]:
p = figure(x_axis_type="mercator", y_axis_type="mercator")
p.add_tile(CARTODBPOSITRON)
p.circle(x = df['coord_x'], 
         y = df['coord_y'],
         size=10,
         line_color="#FF0000", 
         fill_color="#FF0000",
         fill_alpha=0.05)
output_notebook()
show(p)

## Processing file : points.json

In [17]:
file_name = 'points.json'
file = BASE_PATH%(file_name)

In [19]:
points = []

In [20]:
with open(file) as f:
    for line in f:
        tmp_all = fix_JSON(line)
        lat,lon = get_lat_long(tmp_all.get('bounding_box',None))
        tmp_all['lat'] = lat
        tmp_all['lon'] = lon
        points.append(tmp_all)

In [21]:
df1 = pd.DataFrame.from_records(points)

In [22]:
df1.columns

Index(['attributes', 'bounding_box', 'contained_within', 'country',
       'country_code', 'full_name', 'id', 'lat', 'lon', 'name', 'place_type',
       'url'],
      dtype='object')

In [24]:
df1['coord_x'] = df.apply (lambda row: merc(row),axis=1) ## Toggle decorator

In [26]:
df1['coord_y'] = df.apply (lambda row: merc(row),axis=1) ## Toggle decorator

In [30]:
df1.head()

Unnamed: 0,attributes,bounding_box,contained_within,country,country_code,full_name,id,lat,lon,name,place_type,url,coord_x,coord_y
0,{},"{'type': 'Polygon', 'coordinates': [[[-93.2362...",[],United States,US,Loring Bar & Restaurant,0ce294aa05d79000,44.98042,-93.23625,Loring Bar & Restaurant,poi,https://api.twitter.com/1.1/geo/id/0ce294aa05d...,-13625020.0,4550105.0
1,{},"{'type': 'Polygon', 'coordinates': [[[-112.083...",[],United States,US,Unexpected Art Gallery,091ec127c9d72001,33.453708,-112.083795,Unexpected Art Gallery,poi,https://api.twitter.com/1.1/geo/id/091ec127c9d...,-8226111.0,4976556.0
2,{},"{'type': 'Polygon', 'coordinates': [[[-111.739...",[],United States,US,Raising Cane's,0c2ed9b30156b000,33.381074,-111.739979,Raising Cane's,poi,https://api.twitter.com/1.1/geo/id/0c2ed9b3015...,-9393803.0,3989393.0
3,{},"{'type': 'Polygon', 'coordinates': [[[-85.3361...",[],United States,US,The Field House,07d9f459fa887002,35.086167,-85.336188,The Field House,poi,https://api.twitter.com/1.1/geo/id/07d9f459fa8...,-13626490.0,4550871.0
4,{},"{'type': 'Polygon', 'coordinates': [[[-72.9171...",[],United States,US,Canton High School,07d9ee7ad3c80001,41.825261,-72.917181,Canton High School,poi,https://api.twitter.com/1.1/geo/id/07d9ee7ad3c...,-8229683.0,4991264.0


In [31]:
p = figure(x_axis_type="mercator", y_axis_type="mercator")
p.add_tile(CARTODBPOSITRON)
p.circle(x = df1['coord_x'], 
         y = df1['coord_y'],
         size=10,
         line_color="#FF0000", 
         fill_color="#FF0000",
         fill_alpha=0.05)
output_notebook()
show(p)