In [None]:
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.figure_factory as ff
import geopandas as gpd
from shapely.geometry import Polygon
import h3
import s2
from geojson import Feature, Point, FeatureCollection
import numpy as np
from no_smoothing_trees.ipynb import df
import pandas as pd

In [None]:
df_vis = df.copy()
df_vis.dropna(subset=['lat', 'lon'], inplace=True)
df_vis.head()

In [None]:
df_vis['municipality'].value_counts()

In [None]:
def geo_to_h3(row):
  return h3.geo_to_h3(lat=row['lat'], lng=row['lon'], resolution = 5)

df_vis['h3_cell'] = df_vis.apply(geo_to_h3, axis=1)
df_vis_ag = df_vis.reset_index(drop=False)
df_vis_ag

In [None]:
df_vis_agg = (df_vis_ag
              .groupby('h3_cell')
              .index
              .agg(list)
              .to_frame("ids")
              .reset_index())

df_vis_agg['count'] = df_vis_agg['ids'].apply(lambda row: len(row))
df_vis_agg

In [None]:
def add_geometry(row):
  points = h3.h3_to_geo_boundary(row['h3_cell'], True)
  return Polygon(points)

df_vis_agg['geometry'] = df_vis_agg.apply(add_geometry,axis=1)
df_vis_agg

In [None]:
def hex_to_geojson(df_hex, hex_id_field, geometry_field, value_field):
  list_features = []
  for i, row in df_hex.iterrows():
    feature = Feature(geometry=row[geometry_field],
                      id=row[hex_id_field],
                      properties={'value': row[value_field]})
    list_features.append(feature)
    feat_collection = FeatureCollection(list_features)
  return feat_collection

geojson_obj = hex_to_geojson(df_vis_agg, hex_id_field='h3_cell', value_field='count', geometry_field='geometry')

In [None]:
def plot_observation_count(data: pd.DataFrame, category: str = 'all'):
  if category != 'all':
    data = data[data['scientificName'] == category]

  month_count = data['date'].apply(lambda row: row[:7] if isinstance(row, str) else None).value_counts().sort_index()
  fig = px.line(x=month_count.index, y=month_count.values)
  fig.update_layout(title=f'Observations count for {category}',
                    xaxis_title='Month',
                    yaxis_title='Observations')
  fig.show()

plot_observation_count(df_vis)

In [None]:
def plot_observation_municipality(data: pd.DataFrame, category: str = 'all'):
  if category != 'all':
    data = data[data['scientificName'] == category]

  mun_count = data['municipality'].value_counts().head(10)
  fig = px.bar(x=mun_count.index, y=mun_count.values)
  fig.update_layout(title=f'Top 10 municipality for {category}',
                    xaxis_title='Municipality',
                    yaxis_title='Observations')
  fig.show()

plot_observation_municipality(df_vis)

In [None]:
def plot_observation_comp(data: pd.DataFrame):
  obs_count = data['scientificName'].value_counts().to_frame().reset_index()
  obs_count.columns = ['scientificName', 'count']
  obs_count_small = obs_count[obs_count['count'] < 250].sum().to_frame().T
  obs_count_small.loc[0, 'scientificName'] = 'Others'
  obs_count_agg = pd.concat([obs_count_small, obs_count[obs_count['count'] >= 250]])
  fig = px.pie(obs_count_agg, values='count', names='scientificName')
  fig.update_layout(title=f'Distributions of composing species')
  fig.show()

plot_observation_comp(df_vis)

In [None]:
def discrete_colorscale(bvals, colors):
    """
    bvals - list of values bounding intervals/ranges of interest
    colors - list of rgb or hex color codes for values in [bvals[k], bvals[k+1]],0<=k < len(bvals)-1
    returns a nonuniform discrete colorscale
    """
    if len(bvals) != len(colors)+1:
        raise ValueError('len(boundary values) should be equal to  len(colors)+1')
    bvals = sorted(bvals)
    nvals = [(v-bvals[0])/(bvals[-1]-bvals[0]) for v in bvals]  #normalized values

    dcolorscale = [] #discrete colorscale
    for k in range(len(colors)):
        dcolorscale.extend([[nvals[k], colors[k]], [nvals[k+1], colors[k]]])
    return dcolorscale

bvals= [1, 10, 100, 1000]
colors=["#e62bf0", "#2026e3", "#26de26"]
discrete_nonuniform = discrete_colorscale(bvals, colors)

bvals = np.array(bvals)
tickvals = [np.mean(bvals[k:k+2]) for k in range(len(bvals)-1)]
ticktext =  [f'{bvals[k]}-' for k in range(0, len(bvals)-1)]

fig = px.choropleth_mapbox(
    df_vis_agg,
    geojson=geojson_obj,
    color_continuous_scale=discrete_nonuniform,
    locations='h3_cell',
    color='count',
    center=dict(lat=65, lon=24),
    zoom=4.3,
    width=600,
    height=650,
    opacity=0.3,
    labels={'count': 'observations'},
    mapbox_style="open-street-map")

fig.update_geos(projection_type='foucaut')

fig.update_layout(
      autosize=False,
      margin = dict(l=0, r=0, b=0, t=0, pad=4, autoexpand=True),
      coloraxis =dict(colorbar_thickness=25, colorbar_ticktext=ticktext, colorbar_tickvals=tickvals)
    )
fig.show()