# General EDA

In [None]:
import pandas as pd
import numpy as np
import warnings
import matplotlib.pyplot as plt
import rasterio
import geopandas as geopd
import rasterio.rio
import seaborn as sns
import datetime as dt 

from rasterio.plot import show

import pyreadr

First, we import the final dataframes.

In [None]:
df_all = geopd.read_file("../data/final_shapefiles/foxes_modelling_all.shp")
df_resamp = geopd.read_file("../data/final_shapefiles/foxes_modelling_resamp.shp")
sample_points = geopd.read_file("../data/cleaned_shapefiles/sample_points.shp")

## Create dummie variables
### Bin aspect feature
First, we put the aspect feature into bins. One bin for the -1 values (where the slope is zero) and eight bins for the eight geographic directions.

In [None]:
#in a fist step, the category "N" is created twice
df_all["aspect_bin"] = pd.cut(df_all.aspect, 
                                bins = [-1.1,0,22.5,67.5,112.5,157.5,202.5,247.5,292.5,337.5,360],
                                labels = ["None", "N", "NE", "E", "SE", "S", "SW", "W", "NW", "N2"])
#in a second step, the second category is renamed to resemble the first
df_all["aspect_bin"] = df_all.aspect_bin.replace("N2","N")

#repeat for resamp:
df_resamp["aspect_bin"] = pd.cut(df_resamp.aspect, 
                                bins = [-1.1,0,22.5,67.5,112.5,157.5,202.5,247.5,292.5,337.5,360],
                                labels = ["None", "N", "NE", "E", "SE", "S", "SW", "W", "NW", "N2"])
df_resamp["aspect_bin"] = df_resamp.aspect_bin.replace("N2","N")


### Create dummie variables for all categorical variables

In [None]:
cat_variables = ["soil", "veg", "aspect_bin"]

In [None]:
categories_all = pd.get_dummies(df_all[cat_variables], drop_first=True)
categories_resamp = pd.get_dummies(df_resamp[cat_variables], drop_first=True)

In [None]:
df_all = pd.concat([df_all, categories_all], axis = 1)
df_resamp = pd.concat([df_resamp, categories_resamp], axis = 1)

## Start of EDA
### Look at "gaps" in homeranges and all the features in that area

Build indiv_fox_all, which contains all data for each individual fox

In [None]:
indiv_fox_all = {}
for i in df_all.id.unique():
    indiv_fox_all[i] = df_all[df_all.id == i]

In [None]:
fox_0 = indiv_fox_all['2018-FSBD608-001']
fox_1 = indiv_fox_all['2018-FSBD615_v-gr/r-gr']
fox_2 = indiv_fox_all['2018-FSBD619_r-gr/r-y']
fox_3 = indiv_fox_all['2018-FSBD641_b-r/gr-b']
fox_4 = indiv_fox_all['2019-FSAC008-011']
fox_5 = indiv_fox_all['2019-FSAC008-012']
fox_6 = indiv_fox_all['2019-FSBD197-005']
fox_7 = indiv_fox_all['2019-FSBD609-002']
fox_8 = indiv_fox_all['2019-FSBD615-001']
fox_9 = indiv_fox_all['2019-FSBD622-006']
fox_10 = indiv_fox_all['2019-FSBD624-004']
fox_11 = indiv_fox_all['2019-FSBD641_y-gr/r-b']


Get only coordinates around the homeranges for each fox.

In [None]:
df_all_fox_0 = sample_points.query("545930 < x < 551050")
df_all_fox_0 = df_all_fox_0.query("7352170 < y < 7357990")
df_all_fox_1 = sample_points.query("543640 < x < 550450")
df_all_fox_1 = df_all_fox_1.query("7369490 < y < 7372930")
df_all_fox_2 = sample_points.query("549960 < x < 556870")
df_all_fox_2 = df_all_fox_2.query("7367910 < y < 7374750")
df_all_fox_3 = sample_points.query("544190 < x < 551450")
df_all_fox_3 = df_all_fox_3.query("7371810 < y < 7378700")
df_all_fox_4 = sample_points.query("566110 < x < 574200")
df_all_fox_4 = df_all_fox_4.query("7321800 < y < 7328780")
df_all_fox_5 = sample_points.query("567700 < x < 575720")
df_all_fox_5 = df_all_fox_5.query("7324150 < y < 7328990")
df_all_fox_6 = sample_points.query("532040 < x < 547280")
df_all_fox_6 = df_all_fox_6.query("7373060 < y < 7381040")
df_all_fox_7 = sample_points.query("534680 < x < 548400")
df_all_fox_7 = df_all_fox_7.query("7373000 < y < 7380920")
df_all_fox_8 = sample_points.query("527740 < x < 547540")
df_all_fox_8 = df_all_fox_8.query("7368840 < y < 7379200")
df_all_fox_9 = sample_points.query("518730 < x < 526940")
df_all_fox_9 = df_all_fox_9.query("7366450 < y < 7375040")
df_all_fox_10 = sample_points.query("520550 < x < 531560")
df_all_fox_10 = df_all_fox_10.query("7374400 < y < 7383400")
df_all_fox_11 = sample_points.query("543360 < x < 551050")
df_all_fox_11 = df_all_fox_11.query("7372690 < y < 7379790")

In [None]:
fox_11.plot()

In [None]:
fig, ax = plt.subplots()
df_all_fox_11.plot("slope", cmap = "turbo", legend = True, ax=ax)
df_all_fox_3.plot("slope", cmap = "turbo", legend = True, ax=ax)
fox_11.plot(color = "black", marker = "x", ax=ax, alpha = 0.8)
fox_3.plot(color = "white", marker = "x", ax=ax, alpha = 0.8)
plt.show()

Foxes 3 and 11 live in the same area. They both avoid the small area with a very high slope.

In [None]:
fig, ax = plt.subplots()
df_all_fox_5.plot("veg", cmap = "turbo", legend = True, ax=ax)
fox_5.plot(color = "black", marker = "x", ax=ax, alpha = 0.8)
plt.show()

Fox 5 avoids the moist shrub.

In [None]:
fig, ax = plt.subplots()
df_all_fox_6.plot("slope", cmap = "turbo", legend = True, ax=ax)
fox_6.plot(color = "black", marker = "x", ax=ax, alpha = 0.8)
plt.show()

The homerange of fox 6 is limited to the north by a line of high slopes.

In [None]:
fig, ax = plt.subplots()
df_all_fox_6.plot("slope", cmap = "turbo", legend = True, ax=ax)
df_all_fox_7.plot("slope", cmap = "turbo", legend = True, ax=ax)
fox_7.plot(color = "black", marker = "x", ax=ax, alpha = 0.5)
fox_6.plot(color = "white", marker = "x", ax=ax, alpha = 0.5)
plt.show()

Foxes 6 and 7 live in the same area. Again, there is the high slope is the northern boarder.

In [None]:
fig, ax = plt.subplots()
df_all_fox_9.plot("slope", cmap = "turbo", legend = True, ax=ax)
fox_9.plot(color = "black", marker = "x", ax=ax, alpha = 0.8)
plt.show()

Fox 9 has a lot of datapoints in the southern area of the high slope, but none ON the high slope.

In [None]:
fig, ax = plt.subplots()
df_all_fox_10.plot("slope", cmap = "turbo", legend = True, ax=ax)
fox_10.plot(color = "black", marker = "x", ax=ax, alpha = 0.8)
plt.show()

Fox 10 avoids the valley in the north.

In [None]:
fig, ax = plt.subplots()
df_all_fox_11.plot("slope", cmap = "turbo", legend = True, ax=ax)
fox_11.plot(color = "black", marker = "x", ax=ax, alpha = 0.8)
plt.show()

Fox 11 avoids the region with a high slope. And the valley within this region.

In [None]:
fig, ax = plt.subplots()
df_all_fox_11.plot("veg", cmap = "turbo", legend = True, ax=ax)
fox_11.plot(color = "black", marker = "x", ax=ax, alpha = 0.8)
plt.show()

In that valley, there lies snow.

In [None]:
fig, ax = plt.subplots()
df_all_fox_11.plot("slope", cmap = "turbo", legend = True, ax=ax)
df_all_fox_3.plot("slope", cmap = "turbo", legend = True, ax=ax)
fox_11.plot(color = "black", marker = "x", ax=ax, alpha = 0.8)
fox_3.plot(color = "white", marker = "x", ax=ax, alpha = 0.8)
plt.show()

Fox 3 avoids the same region as fox 11 does.