### Imports

In [None]:
from sklearn.neighbors import BallTree
import numpy as np
from numpy import pi
import pandas as pd
import hashlib
import requests
import json

### Air Qualty Data

In [None]:
link1 = 'https://api.waqi.info/feed/geo:41.402746;2.175653/?token=5ba0f3024f257b103b04cc3fd569a67994c57e2e'
link2 = 'https://api.waqi.info/search/?token=5ba0f3024f257b103b04cc3fd569a67994c57e2e&keyword=barcelona'
file = requests.get(link2)
res=json.loads(file.text) # ara res és un json (diccionari de diccionaris)

In [None]:
res

{'data': [{'aqi': '46',
   'station': {'country': 'ES',
    'geo': [41.41843, 2.1238973],
    'name': 'Barcelona, Catalunya, Spain',
    'url': 'spain/catalunya/barcelona'},
   'time': {'stime': '2022-03-11 11:00:00',
    'tz': '+01:00',
    'vtime': 1646992800},
   'uid': 11762},
  {'aqi': '37',
   'station': {'country': 'ES',
    'geo': [41.42608, 2.1479921],
    'name': "Barcelona (Parc Vall d'Hebrón), Catalunya, Spain",
    'url': 'spain/catalunya/barcelona-parc-vall-dhebron'},
   'time': {'stime': '2022-03-15 04:00:00',
    'tz': '+01:00',
    'vtime': 1647313200},
   'uid': 10533},
  {'aqi': '35',
   'station': {'country': 'ES',
    'geo': [41.387486, 2.1151497],
    'name': 'Barcelona (Palau Reial), Catalunya, Spain',
    'url': 'spain/catalunya/barcelona-palau-reial'},
   'time': {'stime': '2022-03-15 04:00:00',
    'tz': '+01:00',
    'vtime': 1647313200},
   'uid': 10532},
  {'aqi': '35',
   'station': {'country': 'ES',
    'geo': [41.370502, 2.114973],
    'name': "l'Hospita

In [None]:
for v in res['data']:
  print(v['uid'], v['aqi'], v['time']['stime'])
  print('-'*20)

11762 46 2022-03-11 11:00:00
--------------------
10533 37 2022-03-15 03:00:00
--------------------
10532 35 2022-03-15 03:00:00
--------------------
6681 35 2022-03-15 03:00:00
--------------------
6674 24 2022-03-11 11:00:00
--------------------
6677 20 2022-03-15 03:00:00
--------------------
6669 - 2022-03-15 03:00:00
--------------------
10534 - 2022-03-15 03:00:00
--------------------
11758 - 2022-03-15 03:00:00
--------------------


### Stations

In [None]:
station_names = [
  "Barcelona, Catalunya, Spain",
  "l'Hospitalet de Llobregat, Catalunya, Spain",
  "Sant Adrià de Besòs, Catalunya, Spain",
  "Montcada i Reixac (Can Sant Joan), Catalunya, Spain",
  "Barcelona (Palau Reial), Catalunya, Spain",
  "Barcelona (Poblenou), Catalunya, Spain",
  "Barcelona (Eixample), Catalunya, Spain",
  "Barcelona (Gràcia-St.Gervasi), Catalunya, Spain",
  "Barcelona (Parc Vall d'Hebrón), Catalunya, Spain"
]
station_coords = [
 (41.41843, 2.1238973),
 (41.370502, 2.114973),
 (41.42562, 2.2222447),
 (41.4698, 2.1842334),
 (41.387486, 2.1151497),
 (41.40388, 2.2045226),
 (41.38534, 2.153822),
 (41.398743, 2.1533852),
 (41.42608, 2.1479921)
]

station_ids = [
  11762,
  6681,
  6677,
  6674,
  10532,
  10534,
  6669,
  11758,
  10533
]

station_lats, station_lons = zip(*station_coords)


stations = pd.DataFrame({'id': station_ids,
                          'nom': station_names,
                          'lat': station_lats,
                          'lon': station_lons})

In [None]:
stations

Unnamed: 0,id,nom,lat,lon
0,11762,"Barcelona, Catalunya, Spain",41.41843,2.123897
1,6681,"l'Hospitalet de Llobregat, Catalunya, Spain",41.370502,2.114973
2,6677,"Sant Adrià de Besòs, Catalunya, Spain",41.42562,2.222245
3,6674,"Montcada i Reixac (Can Sant Joan), Catalunya, ...",41.4698,2.184233
4,10532,"Barcelona (Palau Reial), Catalunya, Spain",41.387486,2.11515
5,10534,"Barcelona (Poblenou), Catalunya, Spain",41.40388,2.204523
6,6669,"Barcelona (Eixample), Catalunya, Spain",41.38534,2.153822
7,11758,"Barcelona (Gràcia-St.Gervasi), Catalunya, Spain",41.398743,2.153385
8,10533,"Barcelona (Parc Vall d'Hebrón), Catalunya, Spain",41.42608,2.147992


In [None]:
stations_aqi = pd.DataFrame({'id':[], 'aqi':[], 'time':[]})
for v in res['data']:
  if v['aqi'] != '-' or v['aqi'] == '0':
    values = [str(v['uid']), int(v['aqi']), v['time']['stime']]
    stations_aqi.loc[stations_aqi.shape[0]] = values
stations_aqi

Unnamed: 0,id,aqi,time
0,11762,46.0,2022-03-11 11:00:00
1,10533,37.0,2022-03-15 03:00:00
2,10532,35.0,2022-03-15 03:00:00
3,6681,35.0,2022-03-15 03:00:00
4,6674,24.0,2022-03-11 11:00:00
5,6677,20.0,2022-03-15 03:00:00


### Stores

In [None]:
store_names = [
  'bodevici',
  'Flax & Kale'
]
store_ids = [
  1000,
  2000,
]
store_coords = [
  (41.403290, 2.158894),
  (41.389024, 2.167796)
]

store_lats, store_lons = zip(*store_coords)

stores = pd.DataFrame({'id': store_ids,
                          'nom': store_names,
                          'lat': store_lats,
                          'lon': store_lons})

In [None]:
stores

Unnamed: 0,id,nom,lat,lon
0,1000,bodevici,41.40329,2.158894
1,2000,Flax & Kale,41.389024,2.167796


### BallTree

In [None]:
earth_radius = 6371008

In [None]:
stations_tree = BallTree(
  np.deg2rad(stations[['lat', 'lon']]),
  metric = 'haversine'
)

In [None]:
min_store_idx = 0
max_store_idx = 1

In [None]:
dist, idx = stations_tree.query(stores.loc[min_store_idx:max_store_idx, ['lat', 'lon']].mul(pi/180).values.reshape(-1,2), k=8) #query_radius?
dist *= earth_radius

In [None]:
near_stations = {}
for i, store_idx in enumerate(stores.id[min_store_idx:max_store_idx+1]):
  near_stations[store_idx] = [(stations.at[station_idx,'id'], np.round(d,2)) for station_idx, d in zip(idx[i], dist[i])]
near_stations

{1000: [(11758, 683.19),
  (6669, 2040.3),
  (10533, 2692.27),
  (11762, 3369.28),
  (10534, 3806.18),
  (10532, 4050.03),
  (6681, 5168.96),
  (6677, 5837.24)],
 2000: [(6669, 1235.66),
  (11758, 1616.47),
  (10534, 3480.48),
  (10532, 4395.28),
  (10533, 4439.14),
  (6681, 4864.75),
  (11762, 4908.86),
  (6677, 6097.51)]}

In [None]:
# get stores' estimated aqi
available_stations = set(stations_aqi.id)
for key, vals in near_stations.items():
  station_aqis_dist = []
  total_dist = 0
  for station_id, distance in vals:
    if str(station_id) in available_stations:
      station_aqis_dist.append((stations_aqi.loc[stations_aqi.id == str(station_id), 'aqi'].values, distance))
      total_dist += distance
    else:
      print(f'Warning: Station {station_id} has no available AQI measure')
  store_aqi = np.sum([aqi*(1-distance/total_dist) for aqi, distance in station_aqis_dist])/(len(station_aqis_dist)-1)
  print(key, store_aqi, sep=': ')


1000: 35.03405211153824
2000: 34.78927631211461
