forked from weecology/EvergladesTools
/
extract.py
150 lines (117 loc) · 5.04 KB
/
extract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#Download images that match annotations from Zooniverse
import pandas as pd
import paramiko
import os
import geopandas as gp
import rasterio
import requests
import json
from skimage import io
from shapely.geometry import box
from PIL import Image
import numpy as np
def download_from_zooniverse(name, url):
#check first if it exists
if not os.path.exists(name):
with open(name, 'wb') as handle:
response = requests.get(url, stream=True)
if not response.ok:
print(response)
for block in response.iter_content(1024):
if not block:
break
handle.write(block)
def extract_empty(parsed_data, image_data,save_dir="."):
df = pd.read_csv(parsed_data)
#get empty frames
df = df[df.species.isna()]
df["subject_id"] = df.subject_ids.astype(int)
#Read in image location data
image_df = pd.read_csv(image_data)
image_df = image_df[["subject_id","locations"]]
image_df = image_df.drop_duplicates()
joined_df = df.merge(image_df,on="subject_id")
#buffer the points by 1m
joined_df["url"] = joined_df.locations.apply(lambda x: json.loads(x)['0'])
grouped_df = joined_df.groupby("url")
#Split into image groups and download the image and write a shapefile
group_data = [grouped_df.get_group(x) for x in grouped_df.groups]
empty_paths = []
for group in group_data:
#Format for download
download_url = group.url.unique()[0]
#Download image
basename = "{}".format(group.subject_id.unique()[0])
name = "{}.png".format(os.path.join(os.path.abspath(save_dir),basename))
download_from_zooniverse(name=name, url=download_url)
#confirm file can be opened
try:
img = io.imread(name)
if img.shape[2] == 4:
img[:,:,:3].save(name)
except Exception as e:
print("{} failed with {}".format(name, e))
continue
empty_paths.append(name)
#Write dict in retinanet format
empty_frame_df = pd.DataFrame({"image_path":empty_paths})
csv_name = "{}.csv".format(os.path.join(save_dir,"empty_frames"))
empty_frame_df.to_csv(csv_name)
def run(classification_shp, image_data ,savedir="."):
"""
classification_shp: path to a processed .csv, see aggregate.py
image_data: subject id download from zooniverse everglades-watch-subjects.csv
"""
#Read in species data
df = gp.read_file(classification_shp)
df = df[["subject_id","x","y","species","behavior","geometry","selected_i"]]
df.subject_id = df.subject_id.astype(int)
#Read in image location data
image_df = pd.read_csv(image_data)
image_df = image_df[["subject_id","locations"]]
#drop duplicates
image_df = image_df.drop_duplicates()
df.subject_id = df.subject_id.astype("int")
joined_df = df.merge(image_df,on="subject_id")
#assert single matches
assert joined_df.shape[0] == df.shape[0]
#buffer the points by 1m
joined_df["url"] = joined_df.locations.apply(lambda x: json.loads(x)['0'])
grouped_df = joined_df.groupby("url")
#Split into image groups and download the image and write a shapefile
group_data = [grouped_df.get_group(x) for x in grouped_df.groups]
for group in group_data:
#Format for download
download_url = group.url.unique()[0]
#Download image
basename = "{}".format(group.subject_id.unique()[0])
name = "{}.png".format(os.path.join(savedir,basename))
download_from_zooniverse(name=name, url=download_url)
#Confirm file can be opened
try:
numpy_image = rasterio.open(name).read()
if numpy_image.shape[0] == 4:
numpy_image = np.moveaxis(numpy_image,0,2)
numpy_image = numpy_image[:,:,:3].astype("uint8")
image = Image.fromarray(numpy_image)
image.save(name)
except Exception as e:
print("{} failed with {}".format(name, e))
continue
#group["geometry"] = [box(left, bottom, right, top) for left, bottom, right, top in group.geometry.buffer(1).bounds.values]
#Create a shapefile
shpname = "{}.shp".format(os.path.join(savedir,basename))
group.to_file(shpname)
if __name__=="__main__":
#Download images
run(
classification_shp="../App/Zooniverse/data/everglades-watch-classifications_unprojected.shp",
image_data="../App/Zooniverse/data/everglades-watch-subjects.csv",
savedir="/orange/ewhite/everglades/Zooniverse/parsed_images/"
)
#Optionally download and format empty frames
extract_empty(
parsed_data="../App/Zooniverse/data/parsed_annotations.csv",
image_data="../App/Zooniverse/data/everglades-watch-subjects.csv",
save_dir= "/orange/ewhite/everglades/Zooniverse/parsed_images/"
)