In [1]:
import numpy as np
from numpy.core.numeric import NaN
from numpy.lib.type_check import _imag_dispatcher
import pandas as pd
from pandas.core.base import DataError
from qwikidata.entity import WikidataItem
from qwikidata.linked_data_interface import get_entity_dict_from_api
from GPSPhoto.gpsphoto import getGPSData
import exifread
import os
from datetime import datetime
from get_map import *
py.init_notebook_mode(connected=True)


In [2]:
def fill(X):
    name = X['name']
    if name is NaN:
        if 'official_name' in X['tags']:
            name = X['tags']['official_name']
        elif 'operator' in X['tags']:
            name = X['tags']['operator']
        elif 'brand:wikidata' in X['tags']:
            wikidata = X['tags']['brand:wikidata']
            q_dict = get_entity_dict_from_api(wikidata)
            name = WikidataItem(q_dict).get_label()
            # print(X.loc[i, ['name', 'tags']])
        elif 'brand:wikipedia' in X['tags']:
            wikipedia = X['tags']['brand:wikipedia']
            name = wikipedia[3:]
        # else:
        #    print(X['tags'])
            
            
    return name

def clean_data(df):
    df['name'] = df.apply(fill, axis=1)
    df.dropna(inplace=True)
    df.drop(['timestamp', 'tags'], axis=1, inplace=True)
    df.reset_index(drop=True, inplace=True)
    return df

def load_img_exif(path):
    # inital image dataframe
    df = pd.DataFrame(columns=['img', 'lat', 'lon', 'datetime'])
    file_list = os.listdir(path)
    for file in file_list:
        img = {}
        img_file = os.path.join(path, file)
        f = open(img_file, 'rb')
        tags = exifread.process_file(f)
        date_str = tags['EXIF DateTimeOriginal'].__str__()
        date = datetime.strptime(date_str, '%Y:%m:%d %H:%M:%S')
        exif = getGPSData(img_file)
        img['img'] = file
        img['lat'] = exif['Latitude']
        img['lon'] = exif['Longitude']
        img['datetime'] = date #datetime.timestamp(date)
        df = df.append(img, ignore_index=True)
    df.sort_values(by='datetime', inplace=True)
    return df

In [4]:
def haversine(lat1, lon1, lat2, lon2):
    # haversine function reference:
    # https://stackoverflow.com/questions/27928/calculate-distance-between-two-latitude-longitude-points-haversine-formula/21623206
    a = (np.sin(np.radians(lat2 - lat1) / 2)**2
         + np.cos(np.radians(lat1))
         * np.cos(np.radians(lat2))
         * np.sin(np.radians(lon2 - lon1) / 2)**2)
    return 12742000 * np.arcsin(np.sqrt(a))

def find_amenity(img, osm):
    dis = haversine(img['lat'], img['lon'],osm['lat'], osm['lon'])
    return np.argmin(dis)

In [5]:
osm_file = './Data/osm/amenities-vancouver.json.gz'
osm_df = pd.read_json(osm_file, lines=True)
#osm_df.info()
osm_df = clean_data(osm_df)
#osm_df.info()
osm_df

Unnamed: 0,lat,lon,amenity,name
0,49.260812,-123.125736,cafe,Starbucks
1,49.260953,-123.125704,fast_food,Salad Loop
2,49.370898,-123.280448,place_of_worship,St. Monica's Anglican Church
3,49.219983,-122.988481,post_box,Canada Post
4,49.264041,-123.153407,fuel,Shell
...,...,...,...,...
8894,49.250408,-123.076261,restaurant,House of Dosas
8895,49.278424,-122.806704,cafe,Creekside Coffee
8896,49.278770,-122.797628,restaurant,Togo Sushi
8897,49.282666,-122.826978,pub,Brown's Social House


In [None]:
scatterMap(osm_df, 'OSM Location in Vancouver', osm_df['name'])

In [6]:
img_path = './Data/image'
img_df = load_img_exif(img_path)
#print(img_df)
img_df

Unnamed: 0,img,lat,lon,datetime
0,IMG_7006.JPG,49.287875,-123.142333,2019-12-14 10:09:25
1,IMG_7013.JPG,49.291225,-123.134508,2019-12-14 12:18:33
2,IMG_7014.JPG,49.300792,-123.130922,2019-12-14 13:44:04
3,IMG_7015.JPG,49.272847,-123.122017,2019-12-14 15:55:21
4,IMG_7016.JPG,49.263247,-123.125833,2019-12-14 18:42:11
5,IMG_7017.JPG,49.284311,-123.114117,2019-12-14 21:06:13
6,IMG_7018.JPG,49.289161,-123.117953,2019-12-14 21:37:53


In [None]:
scatterMap(img_df, 'Images Location', img_df['img'])

In [7]:
index = img_df.apply(find_amenity, osm=osm_df, axis=1)
merged_df = pd.merge(img_df,osm_df,left_on=index,right_index=True)
merged_df

Unnamed: 0,key_0,img,lat_x,lon_x,datetime,lat_y,lon_y,amenity,name
0,369,IMG_7006.JPG,49.287875,-123.142333,2019-12-14 10:09:25,49.288028,-123.14218,bicycle_rental,Morton & Denman
1,1497,IMG_7013.JPG,49.291225,-123.134508,2019-12-14 12:18:33,49.291032,-123.134521,restaurant,Flower & Horse In Spring
2,1360,IMG_7014.JPG,49.300792,-123.130922,2019-12-14 13:44:04,49.299893,-123.131214,cafe,Ocean Wise Cafe
3,6865,IMG_7015.JPG,49.272847,-123.122017,2019-12-14 15:55:21,49.273342,-123.12218,community_centre,Round House Community Centre
4,2645,IMG_7016.JPG,49.263247,-123.125833,2019-12-14 18:42:11,49.263244,-123.12554,fast_food,Subway
5,2931,IMG_7017.JPG,49.284311,-123.114117,2019-12-14 21:06:13,49.284188,-123.113921,restaurant,Caveman Cafe
6,1575,IMG_7018.JPG,49.289161,-123.117953,2019-12-14 21:37:53,49.288669,-123.118213,bicycle_rental,Shaw Tower


In [None]:
fig = px.scatter_mapbox(merged_df, lat="lat_y", lon="lon_y",text='name')

fig.update_traces(textposition='top center')
fig.add_trace(go.Scattermapbox(mode='lines+markers',                                
                                   lat=merged_df["lat_x"],
                                   lon=merged_df["lon_x"]
                              )
             )

fig.update_layout(
    title='Nuclear Waste Sites on Campus',
    autosize=True,
    showlegend=False,
    mapbox_style="open-street-map") #open-street-map
fig.show()

In [8]:
img_df['next_lat'] = img_df['lat'].shift(-1)
img_df['next_lon'] = img_df['lon'].shift(-1)
img_df

Unnamed: 0,img,lat,lon,datetime,next_lat,next_lon
0,IMG_7006.JPG,49.287875,-123.142333,2019-12-14 10:09:25,49.291225,-123.134508
1,IMG_7013.JPG,49.291225,-123.134508,2019-12-14 12:18:33,49.300792,-123.130922
2,IMG_7014.JPG,49.300792,-123.130922,2019-12-14 13:44:04,49.272847,-123.122017
3,IMG_7015.JPG,49.272847,-123.122017,2019-12-14 15:55:21,49.263247,-123.125833
4,IMG_7016.JPG,49.263247,-123.125833,2019-12-14 18:42:11,49.284311,-123.114117
5,IMG_7017.JPG,49.284311,-123.114117,2019-12-14 21:06:13,49.289161,-123.117953
6,IMG_7018.JPG,49.289161,-123.117953,2019-12-14 21:37:53,,


In [264]:
def find_amenities(img, osm):
    a = haversine(img['lat'], img['lon'], img['next_lat'], img['next_lon'])
    b = haversine(img['lat'], img['lon'], osm['lat'], osm['lon'])
    c = haversine(img['next_lat'], img['next_lon'], osm['lat'], osm['lon'])
    semi_p = (a+b+c)/2
    area = np.sqrt(semi_p*(semi_p-a)*(semi_p-b)*(semi_p-c))
    dis = (2*area)/a
    return list(dis[dis<100].index)

img_amenities = img_df.copy()
img_amenities['index'] = img_df.apply(find_amenities, osm=osm_df, axis=1)
img_amenities

Unnamed: 0,img,lat,lon,datetime,next_lat,next_lon,index
0,IMG_7006.JPG,49.287875,-123.142333,2019-12-14 10:09:25,49.291225,-123.134508,"[195, 252, 307, 367, 369, 370, 411, 413, 592, ..."
1,IMG_7013.JPG,49.291225,-123.134508,2019-12-14 12:18:33,49.300792,-123.130922,"[43, 187, 195, 410, 413, 592, 593, 594, 627, 6..."
2,IMG_7014.JPG,49.300792,-123.130922,2019-12-14 13:44:04,49.272847,-123.122017,"[47, 65, 146, 147, 181, 223, 224, 225, 226, 22..."
3,IMG_7015.JPG,49.272847,-123.122017,2019-12-14 15:55:21,49.263247,-123.125833,"[0, 1, 73, 174, 203, 229, 237, 248, 299, 360, ..."
4,IMG_7016.JPG,49.263247,-123.125833,2019-12-14 18:42:11,49.284311,-123.114117,"[0, 1, 149, 152, 237, 240, 289, 299, 360, 482,..."
5,IMG_7017.JPG,49.284311,-123.114117,2019-12-14 21:06:13,49.289161,-123.117953,"[50, 203, 290, 362, 782, 1065, 1070, 1071, 138..."
6,IMG_7018.JPG,49.289161,-123.117953,2019-12-14 21:37:53,,,[]


In [272]:
def find_amenities(img, osm):
    a = haversine(img['lat'], img['lon'], img['next_lat'], img['next_lon'])
    b = haversine(img['lat'], img['lon'], osm['lat'], osm['lon'])
    c = haversine(img['next_lat'], img['next_lon'], osm['lat'], osm['lon'])
    semi_p = (a+b+c)/2
    area = np.sqrt(semi_p*(semi_p-a)*(semi_p-b)*(semi_p-c))
    dis = (2*area)/a
    triangle = abs(b**2-c**2)-a**2
    return list(dis[(dis<100) & (triangle<=0) | (b<100) | (c<100)].index)


In [275]:
osm_index = img_df.apply(find_amenities, osm=osm_df, axis=1)

near_df = pd.DataFrame(columns=['img_index', 'osm_index'])
for i in range(len(osm_index)):
    temp_df = pd.DataFrame({'img_index':[i]*len(osm_index[i]), 'osm_index':osm_index[i]})
    near_df = near_df.append(temp_df, ignore_index=True)

In [276]:
near_df = pd.merge(img_df, near_df, left_index=True, right_on='img_index',how='inner')
near_df = near_df.drop_duplicates('osm_index').reset_index(drop=True)

In [277]:
near_df = pd.merge(near_df, osm_df, left_on='osm_index', right_index=True, how='left')
near_df

Unnamed: 0,img,lat_x,lon_x,datetime,next_lat,next_lon,img_index,osm_index,lat_y,lon_y,amenity,name
0,IMG_7006.JPG,49.287875,-123.142333,2019-12-14 10:09:25,49.291225,-123.134508,0,195,49.291196,-123.135216,bicycle_rental,Robson & Denman
1,IMG_7006.JPG,49.287875,-123.142333,2019-12-14 10:09:25,49.291225,-123.134508,0,307,49.290935,-123.136044,restaurant,Sushiholic
2,IMG_7006.JPG,49.287875,-123.142333,2019-12-14 10:09:25,49.291225,-123.134508,0,367,49.287469,-123.142078,bicycle_rental,Davie & Beach
3,IMG_7006.JPG,49.287875,-123.142333,2019-12-14 10:09:25,49.291225,-123.134508,0,369,49.288028,-123.142180,bicycle_rental,Morton & Denman
4,IMG_7006.JPG,49.287875,-123.142333,2019-12-14 10:09:25,49.291225,-123.134508,0,370,49.288518,-123.139333,bicycle_rental,Comox & Denman
...,...,...,...,...,...,...,...,...,...,...,...,...
405,IMG_7017.JPG,49.284311,-123.114117,2019-12-14 21:06:13,49.289161,-123.117953,5,6236,49.285528,-123.115849,bank,National Bank
406,IMG_7017.JPG,49.284311,-123.114117,2019-12-14 21:06:13,49.289161,-123.117953,5,6563,49.287827,-123.116211,ice_cream,Bella Gelateria
407,IMG_7017.JPG,49.284311,-123.114117,2019-12-14 21:06:13,49.289161,-123.117953,5,8158,49.285399,-123.116413,bureau_de_change,Vancouver Bullion & Currency Exchange
408,IMG_7017.JPG,49.284311,-123.114117,2019-12-14 21:06:13,49.289161,-123.117953,5,8494,49.287057,-123.116321,bank,HSBC


In [285]:
fig = px.scatter_mapbox(near_df, lat="lat_y", lon="lon_y",
                        hover_name='name',labels='img',
                        zoom = 12,color="img",
                       color_discrete_sequence=px.colors.qualitative.G10)

fig.add_trace(go.Scattermapbox(mode='lines+markers',                                
                               lat=img_df["lat"],
                               lon=img_df["lon"],
                               name = 'Actual',
                               marker = {'size': 20, 'color': 'LightSlateGray', 'opacity': 0.7}
                              ))

fig.update_layout(
    title='Nuclear Waste Sites on Campus',
    autosize=True,
    mapbox_style="open-street-map"
    )
fig.show()
 

In [286]:
osm_file = './Data/other/addition.json'
osm_df = pd.read_json(osm_file, lines=True)
#osm_df.info()
#osm_df = clean_data(osm_df)
#osm_df.info()
osm_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,"{'name': 'Brockton Point Lighthouse', 'rating'...","{'name': 'Deering Island Park', 'rating': 4.4,...","{'name': 'Dinsmore Bridge', 'rating': 4, 'revi...",,"{'name': 'Vancouver Christmas Market', 'rating...","{'name': 'Grouse Mountain', 'rating': 4.6, 're...","{'name': 'Vancouver Aquarium', 'rating': 4.5, ...","{'name': 'Stanley Park', 'rating': 4.8, 'revie...","{'name': 'Cypress Mountain', 'rating': 4.5, 'r...",,"{'name': 'Vancouver Art Gallery', 'rating': 4....","{'name': 'VanDusen Botanical Garden', 'rating'...","{'name': 'Canadian Memorial United Church', 'r...","{'name': 'Greater Vancouver Zoo', 'rating': 3....","{'name': 'Museum of Vancouver', 'rating': 4.2,...","{'name': 'Harbour Centre', 'rating': 4.3, 'rev...","{'name': 'Museum of Anthropology', 'rating': 4...","{'name': 'Extreme Air Park', 'rating': 4.4, 'r...","{'name': 'Highview Lookout', 'rating': 4.8, 'r...","{'name': 'Queen Elizabeth Park', 'rating': 4.6..."
