In [78]:
import pandas as pd
import numpy as np
from haversine import haversine
import sqlite3

In [79]:
cn = sqlite3.connect('air.db')

In [80]:
sql = '''select distinct b.metric, a.name, a.lat, a.long, b.t, b.value 
         from stations as a join measurements as b on (a.id = b.station_id)
         where b.metric = 'PM25'
         and t >= (select max(t) from measurements) 
      '''
df = pd.read_sql(sql, cn)

In [81]:
lat, lng = 37.502834, 127.058756

In [82]:
def interpolate(x,y, xs, ys, vals,  p=2, dist_func = haversine):
    '''inverse distance weighted'''
    
    w = np.array([ np.power( 1.0/dist_func( (x,y), (a,b)), p) for a,b in zip(xs,ys)])

    return np.sum(np.multiply(w,vals))/np.sum(w)

In [83]:
interpolate(lat,lng, df.lat, df.long, df.value, p=100)

18.0

select value of p via leave one-out optimization

In [89]:
xs, ys, vals = df.lat, df.long, df.value

In [106]:
def err(p):
    global xs, ys, vals
    
    error =0

    for i in range(len(xs)):

        x,y, actual = xs[i], ys[i], vals[i]
        newx,newy, newvals = xs.copy(), ys.copy(), vals.copy()
        del newx[i]
        del newy[i]
        del newvals[i]

        pred = interpolate(x,y,newx,newy,newvals,p=p[0])

        error += (actual-pred)**2
        
    return error

In [107]:
from scipy.optimize import minimize

In [108]:
p=[2.0]
minimize(err, p)

      fun: 14765.376915956269
 hess_inv: array([[  4.84533302e-08]])
      jac: array([-0.00012207])
  message: 'Desired error not necessarily achieved due to precision loss.'
     nfev: 37
      nit: 7
     njev: 12
   status: 2
  success: False
        x: array([ 2.93865916])

In [109]:
interpolate(lat,lng, df.lat, df.long, df.value, p=2.93)

18.071563103040539