# EDA: time series
In this notebook, we analyze the day trips of the individual foxes.

In [2]:
import sys
sys.path.append("..")
sys.path.append("../modeling")

import home_ranges as hr
import features_for_observations as f4o

from keplergl import KeplerGl

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geopandas as geopd
import seaborn as sns
import datetime as dt 

from rasterio.plot import show

from datetime import datetime, timedelta
from shapely.geometry import Polygon
import shapely

import geopandas as gpd

First, we import the data.

In [3]:
foxes_all = geopd.read_file("../data/cleaned_shapefiles/foxes_all.shp")
sample_points = geopd.read_file("../data/cleaned_shapefiles/sample_points.shp")
dens_all = geopd.read_file("../data/cleaned_shapefiles/dens_norrbotten.shp")

KeyboardInterrupt: 

In the next step, we want to visualize the day trips of the foxes with Kepler GL. Since Kepler uses the EPSG4326 coordinate system and our coordinates are in CRS3006, we include additional rows with the transformed coordinates.

In [None]:
gdf = gpd.GeoDataFrame(foxes_all.geometry, crs=3006)

gdf = gdf.to_crs(epsg= 4326)

foxes_all["geo_kepler_lat"] = [geo.y for geo in gdf.geometry]
foxes_all["geo_kepler_lon"] = [geo.x for geo in gdf.geometry]

Since arctic foxes are nocturnal, we define a 24h "fox_day", starting at noon.
In addition, we add columns for the month and the year of the timestamp.

In [None]:
foxes_all["fox_day"] = [str(datetime.strptime(x, '%Y-%m-%d-%H:%M:%S' ) + timedelta(hours=12))[:10]  for x in foxes_all.t_ ]

foxes_all["month"] = [x[5:7] for x in foxes_all.fox_day]
foxes_all["year"] = [x[:4] for x in foxes_all.fox_day]

In the next step, we calculate the temporal and spatial differences for two subsequent data points of the same fox.

In [None]:
foxes_all["travel_distance"] = f4o.get_distance(foxes_all)
foxes_all["time_diff"] = f4o.get_time_diffs(foxes_all)

Since we have a lot of days with very little data points, we look at how much data we have for different days.

We include two more columns that for each "fox day" count the number of data points and the maximum time delta between to data points on this day (the less, the more precise the information that day).

In [None]:
points_per_day = foxes_all[["id", "time_diff", "fox_day"]].groupby(["id","fox_day"], as_index=False ).count().rename(columns={"time_diff": "points_this_day"})

max_window_per_day = foxes_all[["id", "time_diff", "fox_day"]].groupby(["id","fox_day"], as_index=False ).max().rename(columns={"time_diff": "max_window"})

foxes_all_temp = pd.merge( foxes_all, points_per_day, left_on=["id", "fox_day"], right_on=["id", "fox_day"] )
foxes_all_2 = pd.merge( max_window_per_day, foxes_all_temp , left_on=["id", "fox_day"], right_on=["id", "fox_day"] )


Now, we include the home ranges and the area of the home range for every fox.

In [None]:
foxes_homeranges = foxes_all.groupby(["id", "sex"], as_index=False).count()[["id","sex"]]

foxes_homeranges["geometry"] = [f4o.polygon_to_geojson(hr.hr_area(foxes_all.query('id ==@x'))) for x in foxes_homeranges.id ]
foxes_homeranges["hr_area"] = [hr.hr_area(foxes_all.query('id ==@x')).area for x in foxes_homeranges.id ]

We can compare the size of the biggest and the smallest home range.

In [None]:
foxes_homeranges.hr_area.min() / foxes_homeranges.hr_area.max()

We also calculate the mean of the area.

In [None]:
foxes_homeranges.groupby("sex", as_index = False).mean()

Next, we want to represent the home ranges on a map. For this, we create shapely objects.

In [None]:
# circles around the dens to see how those differ from home ranges. Not yet used for lack of ideas for radius
# future work might define radius as distance from den to farthest point of polygon 
circle_all = Polygon()

# all homeranges together as one Multipolygon.
hr_all = Polygon()

# Areas belonging to more than one home range
intersect_all = Polygon()


for fox_id in foxes_homeranges.id.unique():
    fox_hr_poly = hr.hr_area(foxes_all.query("id == @fox_id"))
    x = hr_all.intersection(fox_hr_poly)
    intersect_all = intersect_all.union(x)
   # circle_all = circle_all.union(circle)
    hr_all = hr_all.union(fox_hr_poly)

We can show the home ranges on a map.

In [None]:
map1=KeplerGl(height=500)

cols_df = ["id", "geo_kepler_lat", "geo_kepler_lon"]
cols_geo = ['fox_day']


for fox_id in foxes_all.id.unique():    
    fox_hr_poly = hr.hr_area(foxes_all.query("id == @fox_id"))
    geojson = f4o.df_to_geojson_trip(foxes_all.query("id == @fox_id "), cols_geo)
    map1.add_data(data=geojson,name='Where does fox  ' + fox_id + ' trot?')
    map1.add_data(data = f4o.polygon_to_geojson(fox_hr_poly), name='homerange' + fox_id)


map1

We also want to analyze how far a fox travels per day. Here, we only look at days with a lot of points. However, this query is very arbitrary.

In [None]:
foxes_relevant_days = foxes_all_2.query("max_window < 1000 and points_this_day > 80")

Based on this reduced data set, it was possible to get min, max, mean and median of the travelled distance (in meters)

In [None]:
foxes_relevant_days[["id", "fox_day","travel_distance"] ].groupby(["id", "fox_day"]).sum().describe()

Next, we create a table to compare these distances by month, to see if there were particularly "active" months.

In our initial data, it seemed that in July foxes travel only half the distance they travel in September.
But our data was too sparse, with only 13 such "day trips" in July, so this is not necessarily representative.

In [None]:
c = foxes_relevant_days[["id", "fox_day", "month","travel_distance"]].groupby(["id", "fox_day", "month"], as_index=False).sum()

d = c[[ "month", "travel_distance"]].groupby([ "month"]).agg([np.min, np.max, np.mean, np.median, np.count_nonzero ], as_index=False)
d.rename(columns={"count_nonzero": "no_of_observations"})