forked from weecology/EvergladesTools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
extract.py
152 lines (116 loc) · 4.97 KB
/
extract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# Download images that match annotations from Zooniverse
import json
import os
import geopandas as gp
import numpy as np
import pandas as pd
import rasterio
import requests
from PIL import Image
from skimage import io
def download_from_zooniverse(name, url):
# check first if it exists
if not os.path.exists(name):
with open(name, 'wb') as handle:
response = requests.get(url, stream=True)
if not response.ok:
print(response)
for block in response.iter_content(1024):
if not block:
break
handle.write(block)
def extract_empty(parsed_data, image_data, save_dir="."):
df = pd.read_csv(parsed_data)
# get empty frames
is_empty = df.groupby(["subject_ids"]).apply(lambda x: sum(~x.species.isna()) == 0)
df = df[df.subject_ids.isin(is_empty[is_empty == True].index)]
df["subject_id"] = df.subject_ids.astype(int)
# Read in image location data
image_df = pd.read_csv(image_data)
image_df = image_df[["subject_id", "locations"]]
image_df = image_df.drop_duplicates()
joined_df = df.merge(image_df, on="subject_id")
# buffer the points by 1m
joined_df["url"] = joined_df.locations.apply(lambda x: json.loads(x)['0'])
grouped_df = joined_df.groupby("url")
# Split into image groups and download the image and write a shapefile
group_data = [grouped_df.get_group(x) for x in grouped_df.groups]
empty_paths = []
for group in group_data:
# Format for download
download_url = group.url.unique()[0]
# Download image
basename = "{}".format(group.subject_id.unique()[0])
name = "{}.png".format(os.path.join(os.path.abspath(save_dir), basename))
download_from_zooniverse(name=name, url=download_url)
# confirm file can be opened
try:
img = io.imread(name)
if img.shape[2] == 4:
img[:, :, :3].save(name)
except Exception as e:
print("{} failed with {}".format(name, e))
continue
empty_paths.append(name)
# Write dict in retinanet format
empty_frame_df = pd.DataFrame({"image_path": empty_paths})
csv_name = "{}.csv".format(os.path.join(save_dir, "empty_frames"))
empty_frame_df.to_csv(csv_name)
def run(classification_shp, image_data, savedir="."):
"""
classification_shp: path to a processed .csv, see aggregate.py
image_data: subject id download from zooniverse everglades-watch-subjects.csv
"""
# Read in species data
df = gp.read_file(classification_shp)
df = df[["subject_id", "x", "y", "species", "behavior", "geometry", "selected_i"]]
df.subject_id = df.subject_id.astype(int)
# Read in image location data
image_df = pd.read_csv(image_data)
image_df = image_df[["subject_id", "locations"]]
# drop duplicates
image_df = image_df.drop_duplicates()
df.subject_id = df.subject_id.astype("int")
joined_df = df.merge(image_df, on="subject_id")
# assert single matches
assert joined_df.shape[0] == df.shape[0]
# buffer the points by 1m
joined_df["url"] = joined_df.locations.apply(lambda x: json.loads(x)['0'])
grouped_df = joined_df.groupby("url")
# Split into image groups and download the image and write a shapefile
group_data = [grouped_df.get_group(x) for x in grouped_df.groups]
for group in group_data:
# Format for download
download_url = group.url.unique()[0]
# Download image
basename = "{}".format(group.subject_id.unique()[0])
name = "{}.png".format(os.path.join(savedir, basename))
download_from_zooniverse(name=name, url=download_url)
# Confirm file can be opened
try:
numpy_image = rasterio.open(name).read()
if numpy_image.shape[0] == 4:
numpy_image = np.moveaxis(numpy_image, 0, 2)
numpy_image = numpy_image[:, :, :3].astype("uint8")
image = Image.fromarray(numpy_image)
image.save(name)
except Exception as e:
print("{} failed with {}".format(name, e))
continue
# group["geometry"] = [box(left, bottom, right, top) for left, bottom, right, top in group.geometry.buffer(1).bounds.values]
# Create a shapefile
shpname = "{}.shp".format(os.path.join(savedir, basename))
group.to_file(shpname)
if __name__ == "__main__":
# Download images
run(
classification_shp="../App/Zooniverse/data/everglades-watch-classifications_unprojected.shp",
image_data="../App/Zooniverse/data/everglades-watch-subjects.csv",
savedir="/orange/ewhite/everglades/Zooniverse/parsed_images/"
)
# Optionally download and format empty frames
extract_empty(
parsed_data="../App/Zooniverse/data/parsed_annotations.csv",
image_data="../App/Zooniverse/data/everglades-watch-subjects.csv",
save_dir="/orange/ewhite/everglades/Zooniverse/parsed_images/"
)