In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import Point, LineString, shape
from PIL import Image
from PIL.ExifTags import TAGS
import os
from shapely.ops import nearest_points

In [2]:
def sum_up_distances(image_index, signal_index, image_geotags):
    if(not (pd.isna(image_index) or pd.isna(signal_index) or np.isnan(image_index) or np.isnan(signal_index))):
        image_index = int(image_index)
        signal_index = int(signal_index)
        distances = image_geotags.distance(image_geotags.shift(-4))
        track_dist = distances[image_index:signal_index]
        track_images = image_geotags[image_index:signal_index]
        lat_diff = track_images.lat.map(float).diff()
        lon_diff = track_images.lon.map(float).diff()
        clean_distances = track_dist[(lon_diff != 0) & (lat_diff != 0)]
        return clean_distances.sum()
    else:
        return None
    
def get_lat_lon(base_path):
    def f_inner(image_name):
        image = Image.open(base_path + image_name)
        exifdata = image.getexif()
        lat, lon = [exifdata.get(34853)[i] for i in [2,4]]
        return pd.Series({"fn": image_name, "lat": lat, "lon": lon})
    return f_inner

In [3]:
project_dir = "/Users/Georg/Dropbox/Work/projects/2020_09_05_hackathon_zurich"
directory_name = f"{project_dir}/data/Dataset_complete/Trackpictures/nice_weather/nice_weather_filisur_thusis_20200824_pixelated/"

In [5]:
trackdata_raw = pd.read_csv(f"{project_dir}/data/Dataset_complete/Trackdata/TrackSiteData_2020_clean.csv")
trackdata = trackdata_raw[[col for col in trackdata_raw.columns if not "Unnamed" in col]]
signals = (trackdata[trackdata["Element Type"].isin(['Distant signal', 'Main & distant signal', 'Main signal'])]
          .dropna(subset=["Latitude", "Longitude"]))
geometry = [Point(xy) for xy in zip(signals.Longitude, signals.Latitude)]
signals_gdf = gpd.GeoDataFrame(signals, geometry=geometry)
signals_gdf.crs = "EPSG:4326"
signals_gdf = signals_gdf.to_crs("EPSG:4326")
(signals_gdf
 .to_csv(f"{project_dir}/data/Dataset_complete/Trackdata/TrackSiteData_2020_clean_geo.csv", index = False))

In [7]:
image_names = pd.Series([fn for fn in os.listdir(directory_name) if "image" in fn])
image_geotags = image_names.apply(get_lat_lon(directory_name))

In [8]:
image_geotags_gdf = gpd.GeoDataFrame(
    image_geotags, geometry=gpd.points_from_xy(image_geotags.lon,image_geotags.lat))
image_geotags_gdf.crs = "EPSG:4326"
image_geotags_gdf = image_geotags_gdf.to_crs("EPSG:4326")
image_geotags_gdf = image_geotags_gdf.sort_values("fn").reset_index(drop=True)
image_geotags_gdf.to_csv(f"{project_dir}/data/geotags/nice_weather_filisur_thusis_20200824_geotags.csv")

In [12]:
image_geotags_gdf["signal_idx"] = None
for i in signals_gdf.index:
    nearest_geom = nearest_points(signals_gdf.loc[i]["geometry"], image_geotags_gdf.geometry.unary_union)[1]
    nearest = image_geotags_gdf["geometry"] == nearest_geom
    signal_index = image_geotags_gdf[nearest].index[0]
    image_geotags_gdf.loc[signal_index, "signal_idx"] = i
    
# image_geotags_gdf["signal_idx"] = image_geotags_gdf.signal_idx.fillna(method="bfill")

In [14]:
pd.options.display.max_rows = None

In [17]:
image_geotags_gdf.loc[116:0]

Unnamed: 0,fn,lat,lon,geometry,signal_idx


In [18]:
image_geotags_gdf.loc[0:116].index

RangeIndex(start=0, stop=117, step=1)

In [22]:
df_index = image_geotags_gdf.loc[0:116].iloc[::-1].index

In [24]:
i = 0

In [30]:
image_geotags_gdf.head()

Unnamed: 0,fn,lat,lon,geometry,signal_idx
0,image_01100.jpg,46.675418,9.683572,POINT (9.68357 46.67542),274.0
1,image_01101.jpg,46.675418,9.683572,POINT (9.68357 46.67542),
2,image_01102.jpg,46.675418,9.683572,POINT (9.68357 46.67542),
3,image_01103.jpg,46.675418,9.683572,POINT (9.68357 46.67542),
4,image_01104.jpg,46.675393,9.683433,POINT (9.68343 46.67539),


In [49]:
image_geotags_gdf["distance"] = get_dist_to_next_signal(image_geotags_gdf)

In [47]:
temp

0          0.000000
1        246.333438
2        246.333438
3        246.333438
4        235.340512
5        235.340512
6        235.340512
7        235.340512
8        225.073205
9        225.073205
10       225.073205
11       225.073205
12       215.439713
13       215.439713
14       215.439713
15       215.439713
16       205.314601
17       205.314601
18       205.314601
19       195.561911
20       195.561911
21       195.561911
22       195.561911
23       186.313852
24       186.313852
25       186.313852
26       186.313852
27       177.489515
28       177.489515
29       177.489515
30       177.489515
31       168.811898
32       168.811898
33       168.811898
34       168.811898
35       160.081285
36       160.081285
37       160.081285
38       160.081285
39       152.007177
40       152.007177
41       152.007177
42       152.007177
43       143.587376
44       143.587376
45       143.587376
46       143.587376
47       135.884170
48       135.884170
49       135.884170


In [37]:
image_geotags_gdf.groupby("signal_idx").

Unnamed: 0,fn,lat,lon,geometry,signal_idx
0,image_01100.jpg,46.675418,9.683572,POINT (9.68357 46.67542),274.0
1,image_01101.jpg,46.675418,9.683572,POINT (9.68357 46.67542),262.0
2,image_01102.jpg,46.675418,9.683572,POINT (9.68357 46.67542),262.0
3,image_01103.jpg,46.675418,9.683572,POINT (9.68357 46.67542),262.0
4,image_01104.jpg,46.675393,9.683433,POINT (9.68343 46.67539),262.0


In [None]:
image_geotags_gdf

In [34]:
distances = image_geotags_gdf.iloc[::-1].distance(image_geotags_gdf.iloc[::-1].shift(-1))


  distances = image_geotags_gdf.distance(image_geotags_gdf.shift(-1))


In [35]:
distances.cumsum()

0       0.000000
1       0.000000
2       0.000000
3       0.000141
4       0.000141
5       0.000141
6       0.000141
7       0.000274
8       0.000274
9       0.000274
10      0.000274
11      0.000399
12      0.000399
13      0.000399
14      0.000399
15      0.000530
16      0.000530
17      0.000530
18      0.000655
19      0.000655
20      0.000655
21      0.000655
22      0.000774
23      0.000774
24      0.000774
25      0.000774
26      0.000889
27      0.000889
28      0.000889
29      0.000889
30      0.001001
31      0.001001
32      0.001001
33      0.001001
34      0.001114
35      0.001114
36      0.001114
37      0.001114
38      0.001214
39      0.001214
40      0.001214
41      0.001214
42      0.001324
43      0.001324
44      0.001324
45      0.001324
46      0.001424
47      0.001424
48      0.001424
49      0.001424
50      0.001518
51      0.001518
52      0.001518
53      0.001518
54      0.001608
55      0.001608
56      0.001608
57      0.001608
58      0.0016

In [31]:
df_i = image_geotags_gdf.loc[df_index[i]]
df_ip1 = image_geotags_gdf.loc[df_index[i + 1]]
# if all(coords_i == coords_ip1)

In [32]:
df_i.distance(df_ip1)

AttributeError: 'Series' object has no attribute 'distance'

In [28]:
coords_i

lat    46.675032
lon     9.680412
Name: 116, dtype: object

In [29]:
coords_ip1

lat    46.675055
lon     9.680548
Name: 115, dtype: object

In [27]:
all(coords_i == coords_ip1)

lat    False
lon    False
dtype: bool

In [None]:
df_index = image_geotags_gdf.loc[0:116]
for i in range(len(df_index)):
    if image_geotags_gdf.loc[df_index[i], ["lat", "lon"]]
    

In [20]:
image_geotags_gdf.loc[0:116].iloc[::-1].index

RangeIndex(start=116, stop=-1, step=-1)

In [15]:
image_geotags_gdf.head(200)

Unnamed: 0,fn,lat,lon,geometry,signal_idx
0,image_01100.jpg,46.675418,9.683572,POINT (9.68357 46.67542),274.0
1,image_01101.jpg,46.675418,9.683572,POINT (9.68357 46.67542),
2,image_01102.jpg,46.675418,9.683572,POINT (9.68357 46.67542),
3,image_01103.jpg,46.675418,9.683572,POINT (9.68357 46.67542),
4,image_01104.jpg,46.675393,9.683433,POINT (9.68343 46.67539),
5,image_01105.jpg,46.675393,9.683433,POINT (9.68343 46.67539),
6,image_01106.jpg,46.675393,9.683433,POINT (9.68343 46.67539),
7,image_01107.jpg,46.675393,9.683433,POINT (9.68343 46.67539),
8,image_01108.jpg,46.675373,9.683302,POINT (9.68330 46.67537),
9,image_01109.jpg,46.675373,9.683302,POINT (9.68330 46.67537),
