### Creation of the functions

In [6]:
%run conf_files.ipynb

To let Geocode work the API key is needed, to allow the usage of Google Maps

In [7]:
key = 'd1dca54adaf64d419fa1a6a6efef0e43'
geocoder = OpenCageGeocode(key)

<a id='get_nearest'></a>
### get_nearest
This function search in a set of candidates points the 10 nearest neighbors for all source points.
First, it creastes a BallTree with the candidates points.
Then, it search for the 10 nearest points (in the candidate tree) for each element in the source points and save the relative distance and indice.
It returns the radius (the biggest distance) and the 10 nearest indices

In [None]:
def get_nearest(src_points, candidates, k_neighbors=10):
    """Find nearest neighbors for all source points from a set of candidate points"""

    # Create tree from the candidate points
    tree = BallTree(candidates, leaf_size=15, metric='haversine')

    # Find closest points and distances
    # return the nearest points in the tree at the input points src_points.
    # for each point in src_points I get a list of 10 indices and a list of 10 distances
    distances, indices = tree.query(src_points, k=k_neighbors)

    # Transpose to get distances and indices into arrays
    distances = distances.transpose()
    indices = indices.transpose()
    radius = distances[9]#the higher distance

    # Get closest indices and distances (i.e. array at index 0)
    # note: for the second closest points, you would take index 1, etc.
    closest0 = indices[0]    
    closest1 = indices[1]
    closest2 = indices[2]
    closest3 = indices[3]
    closest4 = indices[4]
    closest5 = indices[5]
    closest6 = indices[6]
    closest7 = indices[7]
    closest8 = indices[8]
    closest9 = indices[9]
    #return (closest9)
    # Return indices and distances
    return (radius, closest0, closest1,closest2,closest3,closest4,closest5,closest6,closest7,closest8,closest9)

<a id='nearest_neighbor'></a>
##### nearest_neighbor
This function return the mean value of the 10 nearest neighbors of each point in the left_gdf, searching in the right_gdf.

In detail:
- it take two GeoDataFrame as input
- it compute the radius for each of them, based on the coordinates: create a list of left_radians and right radians
- search in the right_radians the 10 nearest neighbors for each of the left_radians
- compute the mean of the 10 nearest neighbors for each right_radians
- if return_dist=True: the higer distance (of the 10th neighbors) is returned

In [1]:
def nearest_neighbor(left_gdf, right_gdf, return_dist=False):
    """
    For each point in left_gdf, find closest point in right GeoDataFrame and return them.

    NOTICE: Assumes that the input Points are in WGS84 projection (lat/lon).
    """

    left_geom_col = left_gdf.geometry.name #return 'geometry'
    right_geom_col = right_gdf.geometry.name #return 'geometry'

    # Ensure that index in right gdf is formed of sequential numbers
    right = right_gdf.copy().reset_index(drop=True) #copy the right_gdf into right with the default integer index

    # Parse coordinates from points and insert them into a numpy array as RADIANS
    # apply transoframtion to the 'geometry' columns in both righe and left datasets and put the result into 
    #left_radians / right_radians (array with a list of couples)
    left_radians = np.array(left_gdf[left_geom_col].apply(lambda geom: (geom.x * np.pi / 180, geom.y * np.pi / 180)).to_list())
    right_radians = np.array(right[right_geom_col].apply(lambda geom: (geom.x * np.pi / 180, geom.y * np.pi / 180)).to_list())

    # Find the nearest points
    # -----------------------
    # closest ==> index in right_gdf that corresponds to the closest point
    # dist ==> distance between the nearest neighbors (in meters)

    radius, closest, closest1,closest2,closest3,closest4,closest5,closest6,closest7,closest8,closest9 = get_nearest(src_points=left_radians, candidates=right_radians)
    # Return points from right GeoDataFrame that are closest to points in left GeoDataFrame
    #this gives the biggest distance (the 10th) between each point in left_radians and right radians.
    #also gives the 10 value for the 10 nearest neighbors.
    closest_points = right.loc[closest].reset_index(drop=True) # for each record in covid_gpd I get the nearest pollutants value
    closest_points1 = right.loc[closest1].reset_index(drop=True) # for each record in covid_gpd I get the second nearest pollutants value
    closest_points2 = right.loc[closest2].reset_index(drop=True)
    closest_points3 = right.loc[closest3].reset_index(drop=True)
    closest_points4 = right.loc[closest4].reset_index(drop=True)
    closest_points5 = right.loc[closest5].reset_index(drop=True)
    closest_points6 = right.loc[closest6].reset_index(drop=True)
    closest_points7 = right.loc[closest7].reset_index(drop=True)
    closest_points8 = right.loc[closest8].reset_index(drop=True)
    closest_points9 = right.loc[closest9].reset_index(drop=True)
    
    df_to_agg = pd.concat([closest_points, closest_points1,
                          closest_points2,
                          closest_points3,
                          closest_points4,
                          closest_points5,
                          closest_points6,
                          closest_points7,
                          closest_points8,
                          closest_points9]) # create a df with all the 10 nearest pollutants
# first 334 rows: 1 nearest neighbors
# second 334 rows: 2 nearest neighbors
# ... 
    df_to_agg = df_to_agg.drop(columns = ['geometry']) # delete the 'geometry' column
    df_agg = df_to_agg.groupby(df_to_agg.index).agg('mean') # take the mean of all the records indexed by 0, i.e. all the 10 nearest neighbors to a certain subregion
    
    # Add distance if requested
    if return_dist:
        # Convert to meters from radians
        earth_radius = 6371000  # meters
        df_agg['radius'] = radius * earth_radius

    merged_gpd = left_gdf.join(df_agg)
    return merged_gpd

<a id='to_gdp_air_dt'></a>
##### to_gdp_air_dt
This function convert a dataFrame into a GeoDataFrame, which is  a pandas.DataFrame that has a column with geometry (long and lat)

In [None]:
def to_gpd_air_dt(input_df):
    '''
    from pd to gpd 
    the input datasets must be lon, lat, val
    '''
    out_gpd = gpd.GeoDataFrame(
    input_df, geometry=gpd.points_from_xy(input_df.iloc[:,0], input_df.iloc[:,1]))
    return(out_gpd)