### Solutions for https://github.com/Automating-GIS-processes-2022/Exercise-2/blob/main/Exercise-2-problem-3.ipynb

In [1]:
import os
os.environ['USE_PYGEOS'] = '0'

from pathlib import Path
import geopandas as gpd
from shapely import LineString

In [2]:
DATA_DIRECTORY = Path("data")

### Problem 3: How far did people travel? (8 points)

#### a) Read the input file and re-project it

In [3]:
kruger_points = gpd.read_file(DATA_DIRECTORY / "kruger_points.shp")

In [4]:
kruger_points = kruger_points.to_crs(epsg=32735)

In [5]:
# NON-EDITABLE CODE CELL FOR TESTING YOUR SOLUTION

# Check the data
kruger_points.head()

Unnamed: 0,lat,lon,timestamp,userid,geometry
0,-24.980792,31.484633,2015-07-07 03:02,66487960,POINT (952912.890 7229683.258)
1,-25.499225,31.508906,2015-07-07 03:18,65281761,POINT (953433.223 7172080.632)
2,-24.342578,30.930866,2015-03-07 03:38,90916112,POINT (898955.144 7302197.408)
3,-24.854614,31.519718,2015-10-07 05:04,37959089,POINT (956927.218 7243564.942)
4,-24.921069,31.520836,2015-10-07 05:19,27793716,POINT (956794.955 7236187.926)


In [6]:
# NON-EDITABLE CODE CELL FOR TESTING YOUR SOLUTION

# Check that the crs is correct after re-projecting (should be epsg:32735)
import pyproj
assert kruger_points.crs == pyproj.CRS("EPSG:32735")

#### b) Group the data by user id

In [7]:
grouped_by_users = kruger_points.groupby('userid')

In [8]:
# NON-EDITABLE CODE CELL FOR TESTING YOUR SOLUTION

# Check the number of groups:
assert len(grouped_by_users.groups) == kruger_points["userid"].nunique(), "Number of groups should match number of unique users!"

#### c) Create `shapely.geometry.LineString` objects for each user connecting the points from oldest to most recent

In [9]:
movements = gpd.GeoDataFrame(
    kruger_points
        .sort_values("timestamp")
        .groupby('userid', sort=False)['geometry']
        .apply(list)
        .loc[lambda series: series.str.len() > 1]
        .apply(LineString),
    crs=32735
)

In [10]:
# NON-EDITABLE CODE CELL FOR TESTING YOUR SOLUTION

# Check the result
print(type(movements))
print(movements.crs)

movements

<class 'geopandas.geodataframe.GeoDataFrame'>
epsg:32735


Unnamed: 0_level_0,geometry
userid,Unnamed: 1_level_1
78183633,"LINESTRING (917548.841 7176756.235, 916219.784..."
20420100,"LINESTRING (963788.403 7228015.063, 963788.403..."
88360442,"LINESTRING (951568.263 7233650.679, 955715.541..."
48538532,"LINESTRING (919803.743 7175996.901, 899001.009..."
91153427,"LINESTRING (917530.151 7210909.389, 917530.151..."
...,...
46347466,"LINESTRING (993265.687 7203488.841, 908253.019..."
39778980,"LINESTRING (937414.214 7170464.721, 937414.214..."
19119058,"LINESTRING (955095.247 7256517.771, 956847.806..."
81326644,"LINESTRING (900262.761 7373880.440, 905933.275..."


#### d) Calculate the distance between all posts of a user

In [11]:
movements["distance"] = movements.length
movements

Unnamed: 0_level_0,geometry,distance
userid,Unnamed: 1_level_1,Unnamed: 2_level_1
78183633,"LINESTRING (917548.841 7176756.235, 916219.784...",371386.424418
20420100,"LINESTRING (963788.403 7228015.063, 963788.403...",150721.814651
88360442,"LINESTRING (951568.263 7233650.679, 955715.541...",13312.130911
48538532,"LINESTRING (919803.743 7175996.901, 899001.009...",42632.852606
91153427,"LINESTRING (917530.151 7210909.389, 917530.151...",36403.553545
...,...,...
46347466,"LINESTRING (993265.687 7203488.841, 908253.019...",85520.367048
39778980,"LINESTRING (937414.214 7170464.721, 937414.214...",0.000000
19119058,"LINESTRING (955095.247 7256517.771, 956847.806...",1966.603155
81326644,"LINESTRING (900262.761 7373880.440, 905933.275...",12996.419820


In [12]:
# NON-EDITABLE CODE CELL FOR TESTING YOUR SOLUTION

#Check the output
movements.head()

Unnamed: 0_level_0,geometry,distance
userid,Unnamed: 1_level_1,Unnamed: 2_level_1
78183633,"LINESTRING (917548.841 7176756.235, 916219.784...",371386.424418
20420100,"LINESTRING (963788.403 7228015.063, 963788.403...",150721.814651
88360442,"LINESTRING (951568.263 7233650.679, 955715.541...",13312.130911
48538532,"LINESTRING (919803.743 7175996.901, 899001.009...",42632.852606
91153427,"LINESTRING (917530.151 7210909.389, 917530.151...",36403.553545


#### e) Answer the original questions

In [13]:
shortest_distance, mean_distance, longest_distance = movements.describe().loc[["min", "mean", "max"]].values.tolist()
shortest_distance, mean_distance, longest_distance

([0.0], [107132.06782599601], [6970668.816343962])

#### f) Save the movements in a file

In [14]:
movements.to_file(DATA_DIRECTORY / "movements.shp")

In [15]:
# NON-EDITABLE CODE CELL FOR TESTING YOUR SOLUTION

assert (DATA_DIRECTORY / "movements.shp").exists()