This script calculates the distance of the safecast data points from every unique mext location

In [19]:
from math import cos, sqrt, sin, radians
import pandas as pd
import numpy as np
import pickle
from datetime import datetime
import dateutil.parser

mloc_hash = pickle.load(open('mloc_hash.pkl','rb'))
mloc_hash_rev = pickle.load(open('mloc_hash_rev.pkl','rb'))

mrad_hash = {}
for key in mext_loc:
    mrad_hash[tuple(map(radians,key))] = mloc_hash[key]
    
mext_rad = list(mrad_hash.keys())

def equirect_approx(lat1,lng1,lat2,lng2):
    """
    values must be in radians"""
    x = (lng2-lng1)*cos((lat1+lat2)/2.0)
    y = lat2-lat1
    return sqrt(x*x + y*y) * 6371.0 #earth radius in km

def haversine_distance(lat1,lng1,lat2,lng2):
    R = 6731.0 #radius of Earth in km
    dlon = lng2 - lng1
    dlat = lat2 - lat1
    a = (sin(dlat/2))**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2 
    c = 2 * np.arctan2(sqrt(a), sqrt(1-a)) 
    return 6731.0 * c

def dist_from_all_mext(latd,lngd):
    """
    inputs are in degrees"""
    
    lat,lng = map(radians, (latd,lngd))
#     return [round(equirect_approx(m[0],m[1],lat,lng),5) for m in mext_rad] #rounded to save disk space and time, equirect approximation
    return [round(haversine_distance(m[0],m[1],lat,lng),5) for m in mext_rad] #haversine formula
#     return [equirect_approx(m[0],m[1],lat,lng) for m in mext_rad] #unrounded

def min_distance(mext_distances):
    """
    returns the minimum distance from any point, lat of that point, long of that point
    """
    ind = np.argmin(mext_distances)
    mloc_id = mrad_hash[mext_rad[ind]]
#     return [mext_distances[ind],loc[0],loc[1]]
    loc = mloc_hash_rev[mloc_id]
    return [mext_distances[ind],loc[0],loc[1],mloc_id] #saves the mext location as a number corresponding to the index in mext_rad list object

mloc_times_hash = pickle.load(open('mloc_times_hash.pkl','rb'))
def closest_time(mloc_id,sftime):
    """
    returns the closest mext time for that mext location and the time difference from safecast time"""
    diff = [abs(dateutil.parser.parse(sftime) - dateutil.parser.parse(mtime)) for mtime in mloc_times_hash[mloc_id]]
    close_time = mloc_times_hash[mloc_id][np.argmin(diff)]
    return [close_time,diff]

In [21]:
#goes through the safecast data (time filtered) and calculates the closest point of all the mext points
#saves two separate files
#1. all the safecast data plus closest point information
#2. only closest point information

import timeit
import csv
from math import radians
t = timeit.default_timer()

with open('safecast2.csv','w') as wf:
    writer = csv.writer(wf)
    with open('safecast_data_datefiltered.csv','r') as sf:
        i=0
        #write header: distance from point at:, lat, long, mloc_id
        writer.writerow(next(sf).strip().split(',')+['distance','mlat','mlong','mloc_id'])
        for line in sf:
            i+=1
            l=line.split(',')
            l[5] = l[5].strip()
#             print(dist_from_all_mext(*map(float,(l[1:3])))[0])
            try:
#                 writer.writerow(dist_from_all_mext(*map(float,(l[1:3])))) #only write distances
                dist_lat_long = min_distance(dist_from_all_mext(*map(float,(l[1:3]))))
                ctime= closest_time(dist_lat_long[-1],l[-1])
                writer.writerow(l+dist_lat_long+ctime)
            except ValueError:
                raise
                writer.writerow(['nan']*3)
                dwriter.writerow(l+['nan']*3)
#                 if i%1000000==0:
            if i%100000==0:
                print(i,timeit.default_timer()-t)
#                     break


KeyError: 25

In [22]:
mloc_times_hash

{(26.314722, 127.895278): ('2011-04-10 21:00:00',
  '2011-04-06 03:00:00',
  '2011-11-09 22:00:00',
  '2011-06-14 12:00:00',
  '2011-12-06 00:00:00',
  '2011-09-15 02:00:00',
  '2011-06-11 10:00:00',
  '2011-11-26 02:00:00',
  '2011-11-17 03:00:00',
  '2011-08-25 19:00:00',
  '2011-08-20 00:00:00',
  '2011-06-17 10:00:00',
  '2011-04-27 21:00:00',
  '2011-08-21 16:00:00',
  '2011-08-02 05:00:00',
  '2011-07-25 15:00:00',
  '2011-06-06 06:00:00',
  '2011-07-12 07:00:00',
  '2011-12-13 13:00:00',
  '2011-06-01 16:00:00',
  '2011-07-27 08:00:00',
  '2011-05-20 22:00:00',
  '2011-03-23 15:00:00',
  '2011-09-28 23:00:00',
  '2011-10-12 12:00:00',
  '2011-10-03 10:00:00',
  '2011-11-30 20:00:00',
  '2011-10-11 13:00:00',
  '2011-10-31 05:00:00',
  '2011-07-13 16:00:00',
  '2011-05-21 08:00:00',
  '2011-12-31 09:00:00',
  '2011-11-13 09:00:00',
  '2011-10-25 12:00:00',
  '2011-04-09 04:00:00',
  '2011-12-01 08:00:00',
  '2011-11-28 22:00:00',
  '2011-12-05 13:00:00',
  '2011-11-04 08:00:00',
