Skip to content

Commit

Permalink
Tus for project dataset (#4485)
Browse files Browse the repository at this point in the history
  • Loading branch information
klakhov committed Mar 22, 2022
1 parent dd60b2d commit bceae22
Show file tree
Hide file tree
Showing 7 changed files with 115 additions and 65 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- Task annotations importing via chunk uploads (<https://github.com/openvinotoolkit/cvat/pull/4327>)
- Advanced filtration and sorting for a list of tasks/projects/cloudstorages (<https://github.com/openvinotoolkit/cvat/pull/4403>)
- Project dataset importing via chunk uploads (<https://github.com/openvinotoolkit/cvat/pull/4485>)

### Changed
- Added missing geos dependency into Dockerfile (<https://github.com/openvinotoolkit/cvat/pull/4451>)
Expand Down
4 changes: 2 additions & 2 deletions cvat-core/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion cvat-core/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "cvat-core",
"version": "5.0.0",
"version": "5.0.1",
"description": "Part of Computer Vision Tool which presents an interface for client-side integration",
"main": "babel.config.js",
"scripts": {
Expand Down
83 changes: 52 additions & 31 deletions cvat-core/src/server-proxy.js
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
onProgress(bytesUploaded) {
if (onUpdate && Number.isInteger(totalSentSize) && Number.isInteger(totalSize)) {
const currentUploadedSize = totalSentSize + bytesUploaded;
const percentage = currentUploadedSize / totalSize;
const percentage = Math.round(currentUploadedSize / totalSize);
onUpdate(percentage);
}
},
Expand Down Expand Up @@ -612,41 +612,63 @@
}

async function importDataset(id, format, file, onUpdate) {
const { backendAPI } = config;
const { backendAPI, origin } = config;
const params = {
...enableOrganization(),
format,
filename: file.name,
};
const uploadConfig = {
chunkSize: config.uploadChunkSize * 1024 * 1024,
endpoint: `${origin}${backendAPI}/projects/${id}/dataset/`,
totalSentSize: 0,
totalSize: file.size,
onUpdate: (percentage) => {
onUpdate('The dataset is being uploaded to the server', percentage);
},
};
const url = `${backendAPI}/projects/${id}/dataset`;

const formData = new FormData();
formData.append('dataset_file', file);

return new Promise((resolve, reject) => {
async function requestStatus() {
try {
const response = await Axios.get(`${url}?action=import_status`, {
proxy: config.proxy,
});
if (response.status === 202) {
if (onUpdate && response.data.message !== '') {
onUpdate(response.data.message, response.data.progress || 0);
try {
await Axios.post(url,
new FormData(), {
params,
proxy: config.proxy,
headers: { 'Upload-Start': true },
});
await chunkUpload(file, uploadConfig);
await Axios.post(url,
new FormData(), {
params,
proxy: config.proxy,
headers: { 'Upload-Finish': true },
});
return new Promise((resolve, reject) => {
async function requestStatus() {
try {
const response = await Axios.get(url, {
params: { ...params, action: 'import_status' },
proxy: config.proxy,
});
if (response.status === 202) {
if (onUpdate && response.data.message) {
onUpdate(response.data.message, response.data.progress || 0);
}
setTimeout(requestStatus, 3000);
} else if (response.status === 201) {
resolve();
} else {
reject(generateError(response));
}
setTimeout(requestStatus, 3000);
} else if (response.status === 201) {
resolve();
} else {
reject(generateError(response));
} catch (error) {
reject(generateError(error));
}
} catch (error) {
reject(generateError(error));
}
}

Axios.post(`${url}?format=${format}`, formData, {
proxy: config.proxy,
}).then(() => {
setTimeout(requestStatus, 2000);
}).catch((error) => {
reject(generateError(error));
});
});
} catch (errorData) {
throw generateError(errorData);
}
}

async function exportTask(id) {
Expand Down Expand Up @@ -1279,8 +1301,7 @@
setTimeout(requestStatus);
});
} catch (errorData) {
generateError(errorData);
return null;
throw generateError(errorData);
}
}

Expand Down
3 changes: 3 additions & 0 deletions cvat/apps/engine/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,9 @@ def get_project_dirname(self):
def get_project_logs_dirname(self):
return os.path.join(self.get_project_dirname(), 'logs')

def get_tmp_dirname(self):
return os.path.join(self.get_project_dirname(), "tmp")

def get_client_log_path(self):
return os.path.join(self.get_project_logs_dirname(), "client.log")

Expand Down
85 changes: 54 additions & 31 deletions cvat/apps/engine/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ def plugins(request):
'200': ProjectSerializer,
})
)
class ProjectViewSet(viewsets.ModelViewSet):
class ProjectViewSet(viewsets.ModelViewSet, UploadMixin):
queryset = models.Project.objects.prefetch_related(Prefetch('label_set',
queryset=models.Label.objects.order_by('id')
))
Expand Down Expand Up @@ -330,21 +330,13 @@ def tasks(self, request, pk):
'400': OpenApiResponse(description='Failed to import dataset'),
'405': OpenApiResponse(description='Format is not available'),
})
@action(detail=True, methods=['GET', 'POST'], serializer_class=None,
url_path='dataset')
@action(detail=True, methods=['GET', 'POST', 'OPTIONS'], serializer_class=None,
url_path=r'dataset/?$')
def dataset(self, request, pk):
db_project = self.get_object() # force to call check_object_permissions

if request.method == 'POST':
format_name = request.query_params.get("format", "")
self._object = self.get_object() # force to call check_object_permissions

return _import_project_dataset(
request=request,
rq_id=f"/api/project/{pk}/dataset_import",
rq_func=dm.project.import_dataset_as_project,
pk=pk,
format_name=format_name,
)
if request.method == 'POST' or request.method == 'OPTIONS':
return self.upload_data(request)
else:
action = request.query_params.get("action", "").lower()
if action in ("import_status",):
Expand All @@ -353,12 +345,12 @@ def dataset(self, request, pk):
if rq_job is None:
return Response(status=status.HTTP_404_NOT_FOUND)
elif rq_job.is_finished:
os.close(rq_job.meta['tmp_file_descriptor'])
if rq_job.meta['tmp_file_descriptor']: os.close(rq_job.meta['tmp_file_descriptor'])
os.remove(rq_job.meta['tmp_file'])
rq_job.delete()
return Response(status=status.HTTP_201_CREATED)
elif rq_job.is_failed:
os.close(rq_job.meta['tmp_file_descriptor'])
if rq_job.meta['tmp_file_descriptor']: os.close(rq_job.meta['tmp_file_descriptor'])
os.remove(rq_job.meta['tmp_file'])
rq_job.delete()
return Response(
Expand All @@ -373,7 +365,7 @@ def dataset(self, request, pk):
else:
format_name = request.query_params.get("format", "")
return _export_annotations(
db_instance=db_project,
db_instance=self._object,
rq_id="/api/project/{}/dataset/{}".format(pk, format_name),
request=request,
action=action,
Expand All @@ -382,6 +374,35 @@ def dataset(self, request, pk):
filename=request.query_params.get("filename", "").lower(),
)

def get_upload_dir(self):
if 'dataset' in self.action:
return self._object.get_tmp_dirname()
return ""

def upload_finished(self, request):
if self.action == 'dataset':
format_name = request.query_params.get("format", "")
filename = request.query_params.get("filename", "")
tmp_dir = self._object.get_tmp_dirname()
uploaded_file = None
if os.path.isfile(os.path.join(tmp_dir, filename)):
uploaded_file = os.path.join(tmp_dir, filename)
return _import_project_dataset(
request=request,
filename=uploaded_file,
rq_id=f"/api/project/{self._object.pk}/dataset_import",
rq_func=dm.project.import_dataset_as_project,
pk=self._object.pk,
format_name=format_name,
)
return Response(data='Unknown upload was finished',
status=status.HTTP_400_BAD_REQUEST)

@action(detail=True, methods=['HEAD', 'PATCH'], url_path='dataset/'+UploadMixin.file_id_regex)
def append_dataset_chunk(self, request, pk, file_id):
self._object = self.get_object()
return self.append_tus_chunk(request, file_id)

@extend_schema(summary='Method allows to download project annotations',
parameters=[
OpenApiParameter('format', description='Desired output format name\n'
Expand Down Expand Up @@ -1768,7 +1789,7 @@ def _export_annotations(db_instance, rq_id, request, format_name, action, callba
result_ttl=ttl, failure_ttl=ttl)
return Response(status=status.HTTP_202_ACCEPTED)

def _import_project_dataset(request, rq_id, rq_func, pk, format_name):
def _import_project_dataset(request, rq_id, rq_func, pk, format_name, filename=None):
format_desc = {f.DISPLAY_NAME: f
for f in dm.views.get_import_formats()}.get(format_name)
if format_desc is None:
Expand All @@ -1781,19 +1802,21 @@ def _import_project_dataset(request, rq_id, rq_func, pk, format_name):
rq_job = queue.fetch_job(rq_id)

if not rq_job:
serializer = DatasetFileSerializer(data=request.data)
if serializer.is_valid(raise_exception=True):
dataset_file = serializer.validated_data['dataset_file']
fd, filename = mkstemp(prefix='cvat_{}'.format(pk))
with open(filename, 'wb+') as f:
for chunk in dataset_file.chunks():
f.write(chunk)

rq_job = queue.enqueue_call(
func=rq_func,
args=(pk, filename, format_name),
job_id=rq_id,
meta={
fd = None
if not filename:
serializer = DatasetFileSerializer(data=request.data)
if serializer.is_valid(raise_exception=True):
dataset_file = serializer.validated_data['dataset_file']
fd, filename = mkstemp(prefix='cvat_{}'.format(pk))
with open(filename, 'wb+') as f:
for chunk in dataset_file.chunks():
f.write(chunk)

rq_job = queue.enqueue_call(
func=rq_func,
args=(pk, filename, format_name),
job_id=rq_id,
meta={
'tmp_file': filename,
'tmp_file_descriptor': fd,
},
Expand Down
2 changes: 2 additions & 0 deletions cvat/apps/iam/permissions.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,8 @@ def get_scopes(request, view, obj):
('retrieve', 'GET'): 'view',
('tasks', 'GET'): 'view',
('dataset', 'POST'): 'import:dataset',
('append_dataset_chunk', 'HEAD'): 'import:dataset',
('append_dataset_chunk', 'PATCH'): 'import:dataset',
('annotations', 'GET'): 'export:annotations',
('dataset', 'GET'): 'export:dataset',
('export_backup', 'GET'): 'export:backup',
Expand Down

0 comments on commit bceae22

Please sign in to comment.