In [1]:
import numpy as np
import pandas as pd
from bokeh.plotting import figure 
from bokeh.io import output_notebook, show
from bokeh.transform import factor_cmap
from bokeh.palettes import Category20
from bokeh.models import HoverTool, ColumnDataSource
output_notebook()

## Loading Data

In [2]:
df = pd.read_csv("AllBirdsv4.csv")
df_test = pd.read_csv("Test Birds Location.csv")

### Pre-processing

#### All Birds file

In [3]:
# replace "?" characters
df = df.replace('?', None)
df["Y"] = df["Y"].map(lambda y: y.replace("?",""))
df["Y"] = df["Y"].astype('int64')

species_list = df["English_name"].unique()
species_index = []
for element in df["English_name"]:
    species_index.append(species_list.tolist().index(element))
df["Birds index"] = species_index

# get years from date column
df["Year"] = df["Date"]
for i in range(df.shape[0]):
    date = df["Year"][i]
    if len(date.split("/")) == 3:
        date = date.split("/")[2]
        df.at[i, "Year"] = date
    elif len(date.split("-")) == 3 and date.split("-")[0] != '0000':
        date = date.split("-")[0]
        df.at[i, "Year"] = date
    else:
        df.at[i, "Year"] = None

In [4]:
print(df["Year"].unique())

['2018' '2017' '2008' '2016' '2000' '2015' '2014' '2013' '1993' '2012'
 '2009' '2004' '2005' '1996' '2010' '2007' '2011' '1991' '2003' '1998'
 '1990' '1992' '1997' '2006' '2002' None '1999' '1988' '1986' '1994'
 '1989' '1983' '1995' '2001']


#### Test Birds file

In [5]:
# rename columns of X and Y
df_test.columns = ["ID", "X", "Y"]

## Map visualization with Bokeh

In [6]:
source = ColumnDataSource(data=dict(x=[], y=[], name=[],))
source.data = dict(
    x=df['X'],
    y=df['Y'],
    name=df["English_name"],
)

TOOLS="crosshair,pan,wheel_zoom,zoom_in,zoom_out,box_zoom,undo,redo,reset,tap,save,box_select,poly_select,lasso_select,"

# put details on cursor
hover = HoverTool()
hover.tooltips = [
    ("Bird type", "@name"),
    ("Position", "(@x, @y)"),
]

p = figure(x_range=(0,200), y_range=(0,200), tools=TOOLS)

p.tools.append(hover)

# plot map
p.image_url( url=["map.jpg"],
             x=0, y=0, w=200, h=200, anchor="bottom_left")

# plot all birds
species_list = np.array(Category20[len(species_list)])
p.scatter(df["X"], df["Y"], size=7, alpha=0.5, color=species_list[species_index])

# plot the 15 "test birds"
p.scatter(df_test["X"], df_test["Y"], size=15, color="red")
show(p)