# 1. Exploration analysis of Dutch Vegetation Database



In [25]:
import pandas as pd

inputdir="/home/komazsofi/Work/Amsterdam/GitHub/ecolidar_WetLand/Data/"
filename="0009022-170826194755519"

in_vegdb=pd.read_csv(inputdir+filename+".csv",sep='\t')

# drop out lines which do not have coordinates
in_vegdb.drop(in_vegdb[in_vegdb.decimallatitude.isnull()].index, inplace=True)
in_vegdb.drop(in_vegdb[in_vegdb.species.isnull()].index, inplace=True)

# get info about the data
#in_vegdb.head()
in_vegdb.dtypes
#in_vegdb.species.unique()

gbifid                             int64
datasetkey                        object
occurrenceid                      object
kingdom                           object
phylum                            object
class                             object
order                             object
family                            object
genus                             object
species                           object
infraspecificepithet              object
taxonrank                         object
scientificname                    object
countrycode                       object
locality                         float64
publishingorgkey                  object
decimallatitude                  float64
decimallongitude                 float64
coordinateuncertaintyinmeters    float64
coordinateprecision              float64
elevation                        float64
elevationaccuracy                float64
depth                            float64
depthaccuracy                    float64
eventdate       

## 1.1. Area of interest

In [34]:
# define rectangular area

print(in_vegdb.decimallongitude.min(),in_vegdb.decimallongitude.max())
print(in_vegdb.decimallatitude.min(),in_vegdb.decimallatitude.max())

long_min=4.5
long_max=4.58
lat_min=52.1
lat_max=52.15

in_vegdb_sel=in_vegdb[(in_vegdb['decimallongitude']>long_min) & (in_vegdb['decimallongitude']<long_max) & 
         (in_vegdb['decimallatitude']>lat_min) & (in_vegdb['decimallatitude']<lat_max)]

in_vegdb_sel.shape

4.3988 5.22974
52.13199 52.59753


(127, 44)

## 1.2. Exploration analysis

In [41]:
from bokeh.charts import Bar, output_file, show
from bokeh.io import output_notebook
from bokeh.models.widgets import Panel, Tabs
from bokeh.plotting import figure

output_notebook()

p1=Bar(in_vegdb_sel, 'order')
tab1 = Panel(child=p1, title="order")

p2=Bar(in_vegdb_sel, 'family')
tab2 = Panel(child=p2, title="family")

tabs = Tabs(tabs=[ tab1, tab2 ])

show(tabs)

## 1.3. Geospatial visualization 

In [17]:
import folium
from folium import plugins

base_map = folium.Map(location=[52, 5], zoom_start=5)
marker = folium.plugins.MarkerCluster().add_to(base_map)

for name, row in in_vegdb_sel.iterrows():
    folium.Marker([row["decimallatitude"], row["decimallongitude"]], popup=row["species"]).add_to(marker)

base_map