In [None]:
import pandas as pd
pd.set_option("display.max_columns", None)
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
import folium


In [None]:
mosquito = gpd.read_file('https://github.com/geo-di-lab/emerge-lessons/raw/refs/heads/main/docs/data/globe_mosquito.zip')
mosquito.head()


In [None]:
mosquito.info()

In [None]:
len(mosquito)

In [None]:
len(mosquito['CountryCode'].unique())

In [None]:
# Broader water source types
mosquito['WaterSourceType'].value_counts()

In [None]:
# More specific water source types
mosquito['WaterSource'].value_counts()

In [None]:
# Here are some options for color palettes
display(sns.color_palette(palette='Set2'))
display(sns.color_palette(palette='twilight_shifted'))
display(sns.color_palette(palette='tab20'))

In [None]:
# Pie chart of water types
types = mosquito[['SiteId', 'WaterSourceType']].groupby('WaterSourceType', as_index=False).count()

plt.figure(figsize=(5, 5))
patches, texts = plt.pie(x = types['SiteId'],
                         colors = sns.color_palette('Set2'))
plt.title("GLOBE Mosquito Sightings: Water Source Types (General)")
plt.legend(patches, types['WaterSourceType'],
           loc = 'center left', bbox_to_anchor=(1, 0.5), frameon=False)
plt.show()

In [None]:
mosquito_avg = mosquito.groupby('CountryCode')['LarvaeCountProcessed'].mean()
mosquito_avg

In [None]:
countries = gpd.read_file('https://github.com/geo-di-lab/emerge-lessons/raw/refs/heads/main/docs/data/world_countries_general.geojson').to_crs(epsg=4326)
mosquito_avg = countries.merge(mosquito_avg, left_on='iso3', right_on='CountryCode', how='left')

In [None]:
fig, ax = plt.subplots(figsize = (10, 4))

mosquito_avg.plot(column = 'LarvaeCountProcessed', cmap = 'viridis',
                     legend = True, vmin = 0, vmax = 50, ax = ax,
                     missing_kwds = {'color': 'lightgrey'})
plt.title('GLOBE Mosquito Sightings: Average Larvae Count')
ax.axis('off')
plt.show()

In [None]:
mosquito_obs = mosquito.groupby('CountryCode').size() \
                       .reset_index(name='GLOBE_Observations')
mosquito_obs = countries.merge(mosquito_obs, left_on='iso3', right_on='CountryCode', how='left')

In [None]:
map = folium.Map(location=[0, 0], zoom_start=3, tiles="CartoDB positron")

# Create the map with a color scale for the number of observations submitted to GLOBE
folium.Choropleth(
    geo_data=mosquito_obs.to_json(),
    name="Choropleth",
    data=mosquito_obs,
    columns=['name', 'GLOBE_Observations'],
    key_on="feature.properties.name",
    fill_color="YlGnBu",
    fill_opacity=0.7,
    bins=[1, 50, 100, 500, 1000, 5000, 10000, 20000],
    legend_name="Number of GLOBE Observations (2018-2024)",
).add_to(map)

# Add pop-up when you hover over the area
folium.GeoJson(
    geo_data=mosquito_obs.to_json(),
    data=mosquito_obs,
    key_on="feature.properties.name",
    tooltip=folium.features.GeoJsonTooltip(fields=['name', 'GLOBE_Observations'], aliases=['Country:', 'Observations:']),
    style_function=lambda feature: {'color': 'white', 'weight': 1}
).add_to(map)

display(map)

In [None]:
land_cover = gpd.read_file('https://github.com/geo-di-lab/emerge-lessons/raw/refs/heads/main/docs/data/globe_land_cover.zip')
land_cover.head()

In [None]:
land_cover.info()

In [None]:
len(land_cover)

In [None]:
len(land_cover['CountryCode'].unique())

In [None]:
# Let's see the different MUC codes
len(land_cover['MucCode'].unique())

In [None]:
# What are the most common MUC codes by country?

muc = land_cover.groupby('CountryCode')['MucDescription'] \
    .apply(lambda x: x.value_counts().idxmax() if not x.value_counts().empty else None).reset_index(name='MucDescription')

# Add a column for the number of the MUC code
muc['Count'] = land_cover.groupby('CountryCode')['MucDescription'] \
    .apply(lambda x: x.value_counts().max()).values

# Add a column for the total number of GLOBE observations
muc['GLOBE_Observations'] = land_cover.groupby('CountryCode').size().values

muc

In [None]:
# We can check that this is correct by checking one of the countries
print('Count of most common MUC:', len(land_cover[(land_cover['CountryCode'] == 'USA') &
 (land_cover['MucDescription'] == 'Herbaceous/Grassland, Short Grass')]))
print('Total GLOBE Observations:', len(land_cover[(land_cover['CountryCode'] == 'USA')]))

In [None]:
muc = countries.merge(muc, left_on='iso3', right_on='CountryCode', how='left')

In [None]:
muc_list = ['Barren', 'Closed Forest', 'Cultivated', 'Herbaceous', 'Open Water', 'Trees', 'Urban', 'Wetlands', 'Woodland']

for muc_code in muc_list:
    muc.loc[muc['MucDescription'].str.contains(muc_code, na=False), 'MucDescriptionShort'] = muc_code

In [None]:
fig, ax = plt.subplots(figsize = (11, 5))

muc.plot(column = 'MucDescriptionShort', cmap = 'viridis',
                     legend = True, ax = ax,
                     missing_kwds = {'color': 'lightgrey', 'label': 'No Data'},
                     legend_kwds={'loc': 'lower left', 'frameon': False})
plt.title('GLOBE Land Cover: Most Common MUC Codes')
plt.show()

In [None]:
map = folium.Map(location=[0, 0], zoom_start=3, tiles="CartoDB positron")

# Create the map with a color scale for the number of observations submitted to GLOBE
folium.Choropleth(
    geo_data=muc.to_json(),
    name="Choropleth",
    data=muc,
    columns=['name', 'GLOBE_Observations'],
    key_on="feature.properties.name",
    fill_color="YlGnBu",
    fill_opacity=0.7,
    bins=[1, 50, 100, 500, 1000, 5000, 10000, 20000, 30200],
    legend_name="Number of GLOBE Observations (2018-2024)",
).add_to(map)

# Add pop-up when you hover over the area
folium.GeoJson(
    geo_data=muc.to_json(),
    data=muc,
    key_on="feature.properties.name",
    tooltip=folium.features.GeoJsonTooltip(fields=['name', 'MucDescriptionShort', 'GLOBE_Observations'], aliases=['Country:', 'Most common MUC:', 'Observations:']),
    style_function=lambda feature: {'color': 'white', 'weight': 1}
).add_to(map)

display(map)