In [1]:
import pandas as pd
import geopandas as gpd
from shapely import wkt

In [2]:
ookla_df = pd.read_csv('Ookla_dataset.csv')

ookla_df.head()

Unnamed: 0,quadkey,avg_d_kbps,avg_u_kbps,avg_lat_ms,tests,devices,geometry
0,22133222312322,5227,2449,54,2,2,"POLYGON ((-160.02685546875 70.6435894914449, -..."
1,22133222330010,9529,3214,44,1,1,"POLYGON ((-160.037841796875 70.6417687358462, ..."
2,22133222330011,9392,3390,42,1,1,"POLYGON ((-160.032348632812 70.6417687358462, ..."
3,22133222330013,961,723,50,2,1,"POLYGON ((-160.032348632812 70.6399478155463, ..."
4,22133222330023,6547,3135,46,5,3,"POLYGON ((-160.043334960938 70.6363054807905, ..."


In [3]:
viirs_cleaned_agg_df = pd.read_csv('viirs_cleaned_agg_dataset.csv')

viirs_cleaned_agg_df.head()

Unnamed: 0,state,longitude,latitude,light_intensity,year,observations
0,bahia,-46.6,-11.295833,0.34,2020,3
1,bahia,-46.6,-11.291667,0.24,2020,3
2,bahia,-46.6,-11.2875,0.303333,2020,3
3,bahia,-46.6,-11.283333,0.296667,2020,3
4,bahia,-46.6,-11.279167,0.29,2020,2


In [4]:
# Convert Ookla geometry to GeoDataFrame
ookla_df['geometry'] = ookla_df['geometry'].apply(wkt.loads)
ookla_gdf = gpd.GeoDataFrame(ookla_df, geometry='geometry')

ookla_gdf.head()

Unnamed: 0,quadkey,avg_d_kbps,avg_u_kbps,avg_lat_ms,tests,devices,geometry
0,22133222312322,5227,2449,54,2,2,"POLYGON ((-160.02686 70.64359, -160.02136 70.6..."
1,22133222330010,9529,3214,44,1,1,"POLYGON ((-160.03784 70.64177, -160.03235 70.6..."
2,22133222330011,9392,3390,42,1,1,"POLYGON ((-160.03235 70.64177, -160.02686 70.6..."
3,22133222330013,961,723,50,2,1,"POLYGON ((-160.03235 70.63995, -160.02686 70.6..."
4,22133222330023,6547,3135,46,5,3,"POLYGON ((-160.04333 70.63631, -160.03784 70.6..."


In [5]:
# Convert VIIRS longitude/latitude to GeoDataFrame
viirs_cleaned_agg_gdf = gpd.GeoDataFrame(
    viirs_cleaned_agg_df, geometry=gpd.points_from_xy(viirs_cleaned_agg_df.longitude, viirs_cleaned_agg_df.latitude)
)

viirs_cleaned_agg_gdf.head()

Unnamed: 0,state,longitude,latitude,light_intensity,year,observations,geometry
0,bahia,-46.6,-11.295833,0.34,2020,3,POINT (-46.6 -11.29583)
1,bahia,-46.6,-11.291667,0.24,2020,3,POINT (-46.6 -11.29167)
2,bahia,-46.6,-11.2875,0.303333,2020,3,POINT (-46.6 -11.2875)
3,bahia,-46.6,-11.283333,0.296667,2020,3,POINT (-46.6 -11.28333)
4,bahia,-46.6,-11.279167,0.29,2020,2,POINT (-46.6 -11.27917)


In [6]:
# Perform spatial join
joined_gdf = gpd.sjoin(viirs_cleaned_agg_gdf, ookla_gdf, how='inner', predicate='intersects')

joined_gdf.to_csv('merged_cleaned_agg_datasets.csv', index=False)

print("Merged dataset saved as 'merged_cleaned_agg_datasets.csv'")

Merged dataset saved as 'merged_cleaned_agg_datasets.csv'
