In [11]:
import pandas as pd
from scipy.spatial.distance import cdist


data1 = {'Lat': pd.Series([50.6373473,50.63740441,50.63744285,50.63737839,50.6376054,50.6375896,50.6374239,50.6374404]),
         'Lon': pd.Series([3.075029928,3.075068636,3.074951754,3.074913884,3.0750528,3.0751209,3.0750246,3.0749554]),
         'Zone': pd.Series(['A','A','A','A','B','B','B','B'])}

data2 = {'Lat': pd.Series([50.6375524099,50.6375714407]),
         'Lon': pd.Series([3.07507914474,3.07508201591])}


def closest_point(point, points):
    """ Find closest point from a list of points. """
    return points[cdist([point], points).argmin()]

def match_value(df, col1, x, col2):
    """ Match value x from col1 row to value in col2. """
    return df[df[col1] == x][col2].values[0]


df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)

df1['point'] = [(x, y) for x,y in zip(df1['Lat'], df1['Lon'])]
df2['point'] = [(x, y) for x,y in zip(df2['Lat'], df2['Lon'])]

df2['closest'] = [closest_point(x, list(df1['point'])) for x in df2['point']]
df2['zone'] = [match_value(df1, 'point', x, 'Zone') for x in df2['closest']]

df2

Unnamed: 0,Lat,Lon,point,closest,zone
0,50.637552,3.075079,"(50.6375524099, 3.07507914474)","(50.6375896, 3.0751209)",B
1,50.637571,3.075082,"(50.6375714407, 3.07508201591)","(50.6375896, 3.0751209)",B


In [30]:
import xarray as xr
ds_m = xr.open_dataset('monthy.nc')
df_m = ds_m.to_dataframe().reset_index()
df_m.head()

Unnamed: 0,lon,lat,time,CLOUD_OD_11,ALLSKY_SFC_SW_DWN_19,CLOUD_OD_02,CLOUD_OD_05,CLOUD_AMT_11,ALLSKY_SFC_SW_DWN_04,ALLSKY_SFC_SW_DWN_01,...,CLOUD_OD_17,CLOUD_AMT_09,ALLSKY_SFC_SW_DWN_05,ALLSKY_SFC_SW_DWN_11,ALLSKY_SFC_SW_DWN_21,CLOUD_AMT_15,ALLSKY_SFC_SW_DWN_15,CLOUD_OD_07,ALLSKY_SFC_SW_DWN_13,ALLSKY_SFC_SW_DWN_16
0,-179.5,89.5,0.0,4.859375,0.0,5.304688,4.5625,57.101562,0.0,0.0,...,4.492188,55.132812,0.0,0.0,0.0,61.09375,0.0,4.492188,0.0,0.0
1,-179.5,89.5,1.0,4.375,0.0,4.335938,4.195312,76.171875,0.0,0.0,...,4.28125,78.34375,0.0,0.0,0.0,75.5625,0.0,4.484375,0.0,0.0
2,-179.5,89.5,2.0,3.625,11.875,3.664062,3.3125,56.453125,11.65625,12.734375,...,3.5,55.585938,11.054688,8.960938,13.171875,61.0625,9.148438,3.539062,8.507812,9.90625
3,-179.5,89.5,3.0,2.398438,170.171875,2.539062,2.007812,64.632812,163.648438,156.6875,...,2.28125,67.929688,166.8125,151.695312,174.796875,67.070312,159.53125,1.78125,148.789062,166.0625
4,-179.5,89.5,4.0,2.789062,319.648438,2.90625,3.117188,82.46875,296.09375,317.90625,...,3.679688,80.421875,303.375,289.46875,308.484375,81.960938,294.898438,3.054688,293.960938,309.289062


In [29]:
df_m.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 842400 entries, (-179.5, 89.5, 0.0) to (179.5, -89.5, nan)
Data columns (total 72 columns):
 #   Column                Non-Null Count   Dtype  
---  ------                --------------   -----  
 0   CLOUD_OD_11           842400 non-null  float64
 1   ALLSKY_SFC_SW_DWN_19  842400 non-null  float64
 2   CLOUD_OD_02           842400 non-null  float64
 3   CLOUD_OD_05           842400 non-null  float64
 4   CLOUD_AMT_11          842400 non-null  float64
 5   ALLSKY_SFC_SW_DWN_04  842400 non-null  float64
 6   ALLSKY_SFC_SW_DWN_01  842400 non-null  float64
 7   CLOUD_OD_12           842400 non-null  float64
 8   ALLSKY_SFC_SW_DWN_18  842400 non-null  float64
 9   CLOUD_OD_13           842400 non-null  float64
 10  CLOUD_OD_15           842400 non-null  float64
 11  CLOUD_OD_09           842400 non-null  float64
 12  ALLSKY_SFC_SW_DWN_10  842400 non-null  float64
 13  CLOUD_OD_19           842400 non-null  float64
 14  CLOUD_AMT_13         

In [33]:
zones_df = pd.read_csv("worldcities.csv", sep=",")
zones_df.head()

Unnamed: 0,city,city_ascii,lat,lng,country,iso2,iso3,admin_name,capital,population,id
0,Tokyo,Tokyo,35.6897,139.6922,Japan,JP,JPN,Tōkyō,primary,37732000.0,1392685764
1,Jakarta,Jakarta,-6.175,106.8275,Indonesia,ID,IDN,Jakarta,primary,33756000.0,1360771077
2,Delhi,Delhi,28.61,77.23,India,IN,IND,Delhi,admin,32226000.0,1356872604
3,Guangzhou,Guangzhou,23.13,113.26,China,CN,CHN,Guangdong,admin,26940000.0,1156237133
4,Mumbai,Mumbai,19.0761,72.8775,India,IN,IND,Mahārāshtra,admin,24973000.0,1356226629


In [34]:
# convert to GeoDataFrame
df1 = gpd.GeoDataFrame(df_m, geometry=gpd.points_from_xy(df_m.lon, df_m.lat))
df2 = gpd.GeoDataFrame(zones_df, geometry=gpd.points_from_xy(zones_df.lng, zones_df.lat))

# aggregate & merge
merged = gpd.sjoin_nearest(df1, df2) #, how = 'inner', max_distance = 1000 , lsuffix = 'left', rsuffix = 'right', distance_col = "dist")



# output
# output_columns = ['Lat', 'Lon', 'city']
# merged[output_columns]
merged

Unnamed: 0,lon,lat_left,time,CLOUD_OD_11,ALLSKY_SFC_SW_DWN_19,CLOUD_OD_02,CLOUD_OD_05,CLOUD_AMT_11,ALLSKY_SFC_SW_DWN_04,ALLSKY_SFC_SW_DWN_01,...,city_ascii,lat_right,lng,country,iso2,iso3,admin_name,capital,population,id
0,-179.5,89.5,0.0,4.859375,0.000000,5.304688,4.562500,57.101562,0.000000,0.000000,...,Zvezdnyy,70.9333,-179.6000,Russia,RU,RUS,Chukotskiy Avtonomnyy Okrug,,10.0,1643877771
1,-179.5,89.5,1.0,4.375000,0.000000,4.335938,4.195312,76.171875,0.000000,0.000000,...,Zvezdnyy,70.9333,-179.6000,Russia,RU,RUS,Chukotskiy Avtonomnyy Okrug,,10.0,1643877771
2,-179.5,89.5,2.0,3.625000,11.875000,3.664062,3.312500,56.453125,11.656250,12.734375,...,Zvezdnyy,70.9333,-179.6000,Russia,RU,RUS,Chukotskiy Avtonomnyy Okrug,,10.0,1643877771
3,-179.5,89.5,3.0,2.398438,170.171875,2.539062,2.007812,64.632812,163.648438,156.687500,...,Zvezdnyy,70.9333,-179.6000,Russia,RU,RUS,Chukotskiy Avtonomnyy Okrug,,10.0,1643877771
4,-179.5,89.5,4.0,2.789062,319.648438,2.906250,3.117188,82.468750,296.093750,317.906250,...,Zvezdnyy,70.9333,-179.6000,Russia,RU,RUS,Chukotskiy Avtonomnyy Okrug,,10.0,1643877771
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
841589,179.5,-27.5,8.0,3.570312,202.906250,4.640625,2.828125,69.875000,202.242188,674.781250,...,Suva,-18.1416,178.4419,Fiji,FJ,FJI,Rewa,primary,185913.0,1242615095
841590,179.5,-27.5,9.0,2.406250,289.531250,4.390625,3.960938,78.148438,223.351562,667.093750,...,Suva,-18.1416,178.4419,Fiji,FJ,FJI,Rewa,primary,185913.0,1242615095
841591,179.5,-27.5,10.0,2.835938,393.062500,4.710938,11.640625,68.781250,287.500000,759.406250,...,Suva,-18.1416,178.4419,Fiji,FJ,FJI,Rewa,primary,185913.0,1242615095
841592,179.5,-27.5,11.0,3.656250,351.601562,8.671875,13.960938,81.117188,280.523438,705.460938,...,Suva,-18.1416,178.4419,Fiji,FJ,FJI,Rewa,primary,185913.0,1242615095


In [38]:
len(merged["city"].unique())

8363

In [39]:
zones_df[zones_df["city"] == "Bogotá"]

Unnamed: 0,city,city_ascii,lat,lng,country,iso2,iso3,admin_name,capital,population,id,geometry
63,Bogotá,Bogota,4.7111,-74.0722,Colombia,CO,COL,Bogotá,primary,7968095.0,1170483426,POINT (-74.07220 4.71110)


In [43]:
merged[(merged["lat_left"] == 4.5) & (merged["lon"] == -74.5)]

Unnamed: 0,lon,lat_left,time,CLOUD_OD_11,ALLSKY_SFC_SW_DWN_19,CLOUD_OD_02,CLOUD_OD_05,CLOUD_AMT_11,ALLSKY_SFC_SW_DWN_04,ALLSKY_SFC_SW_DWN_01,...,city_ascii,lat_right,lng,country,iso2,iso3,admin_name,capital,population,id
246805,-74.5,4.5,0.0,3.953125,536.117188,3.75,3.632812,53.046875,0.0,0.0,...,Anapoima,4.5503,-74.5361,Colombia,CO,COL,Cundinamarca,minor,14519.0,1170450555
246806,-74.5,4.5,1.0,5.15625,500.820312,4.429688,4.828125,71.203125,0.0,0.0,...,Anapoima,4.5503,-74.5361,Colombia,CO,COL,Cundinamarca,minor,14519.0,1170450555
246807,-74.5,4.5,2.0,8.0625,448.109375,5.484375,5.601562,79.53125,0.0,0.0,...,Anapoima,4.5503,-74.5361,Colombia,CO,COL,Cundinamarca,minor,14519.0,1170450555
246808,-74.5,4.5,3.0,14.171875,480.992188,3.773438,5.429688,88.835938,0.0,0.0,...,Anapoima,4.5503,-74.5361,Colombia,CO,COL,Cundinamarca,minor,14519.0,1170450555
246809,-74.5,4.5,4.0,11.898438,490.460938,4.476562,4.75,78.648438,0.0,0.0,...,Anapoima,4.5503,-74.5361,Colombia,CO,COL,Cundinamarca,minor,14519.0,1170450555
246810,-74.5,4.5,5.0,10.734375,485.476562,3.875,5.09375,73.648438,0.0,0.0,...,Anapoima,4.5503,-74.5361,Colombia,CO,COL,Cundinamarca,minor,14519.0,1170450555
246811,-74.5,4.5,6.0,6.664062,472.953125,3.617188,3.6875,73.820312,0.0,0.0,...,Anapoima,4.5503,-74.5361,Colombia,CO,COL,Cundinamarca,minor,14519.0,1170450555
246812,-74.5,4.5,7.0,8.703125,479.523438,3.703125,4.828125,85.460938,0.0,0.0,...,Anapoima,4.5503,-74.5361,Colombia,CO,COL,Cundinamarca,minor,14519.0,1170450555
246813,-74.5,4.5,8.0,9.960938,500.976562,4.117188,4.085938,66.4375,0.0,0.0,...,Anapoima,4.5503,-74.5361,Colombia,CO,COL,Cundinamarca,minor,14519.0,1170450555
246814,-74.5,4.5,9.0,8.0,458.320312,5.179688,4.804688,69.6875,0.0,0.0,...,Anapoima,4.5503,-74.5361,Colombia,CO,COL,Cundinamarca,minor,14519.0,1170450555
