Skip to content

Commit

Permalink
BITMAKER-2794 product-dev: Development Backend Module global jobs sta…
Browse files Browse the repository at this point in the history
…ts (#173)

* All jobs storage in a single collection

* Add: initial urls and viewsets for stats

* Update Viewsets and adding Mixins

Updated the viewsets for global stats and spider jobs viewsets, also updated the documentation and added a mixin for common functions in both stats viewset

* Update: refactoring mixin code for stats and viewSet classes

---------

Co-authored-by: Raymond Negron <raymond1242@Raymonds-MacBook-Air.local>
  • Loading branch information
webtaken and Raymond Negron committed Apr 26, 2023
1 parent 82c9b36 commit fcd6e1e
Show file tree
Hide file tree
Showing 17 changed files with 1,412 additions and 2 deletions.
6 changes: 6 additions & 0 deletions database_adapters/db_adapters.py
Expand Up @@ -120,6 +120,12 @@ def get_job_stats(self, database_name, collection_name):
)
return list(result)

def get_jobs_set_stats(self, database_name, jobs_ids):
result = self.client[database_name]["job_stats"].find(
{"_id": {"$in": jobs_ids}}
)
return list(result)

def get_paginated_collection_data(
self, database_name, collection_name, page, page_size
):
Expand Down
54 changes: 54 additions & 0 deletions estela-api/api/serializers/stats.py
@@ -0,0 +1,54 @@
from rest_framework import serializers


class LogsStatsSerializer(serializers.Serializer):
total_logs = serializers.IntegerField(default=0)
debug_logs = serializers.IntegerField(default=0)
info_logs = serializers.IntegerField(default=0)
warning_logs = serializers.IntegerField(default=0)
error_logs = serializers.IntegerField(default=0)
critical_logs = serializers.IntegerField(default=0)


class JobsStatsSerializer(serializers.Serializer):
total_jobs = serializers.IntegerField(default=0)
running_jobs = serializers.IntegerField(default=0)
error_jobs = serializers.IntegerField(default=0)
unknown_jobs = serializers.IntegerField(default=0)
finished_jobs = serializers.IntegerField(default=0)


class PagesStatsSerializer(serializers.Serializer):
total_pages = serializers.IntegerField(default=0)
scraped_pages = serializers.IntegerField(default=0)
missed_pages = serializers.IntegerField(default=0)


class StatusCodesStatsSerializer(serializers.Serializer):
status_200 = serializers.IntegerField(default=0)
status_301 = serializers.IntegerField(default=0)
status_302 = serializers.IntegerField(default=0)
status_401 = serializers.IntegerField(default=0)
status_403 = serializers.IntegerField(default=0)
status_404 = serializers.IntegerField(default=0)
status_429 = serializers.IntegerField(default=0)
status_500 = serializers.IntegerField(default=0)


class StatsSerializer(serializers.Serializer):
jobs = JobsStatsSerializer()
pages = PagesStatsSerializer()
items_count = serializers.IntegerField(default=0)
runtime = serializers.FloatField(default=0.0)
status_codes = StatusCodesStatsSerializer()
success_rate = serializers.FloatField(default=0.0)
logs = LogsStatsSerializer()


class GlobalStatsSerializer(serializers.Serializer):
date = serializers.DateField(format="%Y-%m-%d")
stats = StatsSerializer()


class SpidersJobsStatsSerializer(GlobalStatsSerializer):
pass
11 changes: 11 additions & 0 deletions estela-api/api/urls.py
Expand Up @@ -8,6 +8,7 @@
auth as auth_views,
cronjob as cronjob_views,
job_data as job_data_views,
stats as stats_views,
)

router = routers.DefaultRouter(trailing_slash=False)
Expand Down Expand Up @@ -41,6 +42,16 @@
viewset=cronjob_views.SpiderCronJobViewSet,
basename="cronjob",
)
router.register(
prefix=r"projects/(?P<pid>[0-9a-z-]+)/stats",
viewset=stats_views.GlobalStatsViewSet,
basename="stats",
)
router.register(
prefix=r"projects/(?P<pid>[0-9a-z-]+)/stats/spiders/(?P<sid>\d+)",
viewset=stats_views.SpidersJobsStatsViewSet,
basename="stats-spider",
)
router.register(prefix=r"auth", viewset=auth_views.AuthAPIViewSet, basename="auth")
router.register(
prefix=r"auth/profile", viewset=auth_views.UserProfileViewSet, basename="profile"
Expand Down
1 change: 0 additions & 1 deletion estela-api/api/views/job_data.py
Expand Up @@ -161,7 +161,6 @@ def list(self, request, *args, **kwargs):
if next_chunk:
response["next_chunk"] = next_chunk
return Response(response)

if data_type == "stats":
result = spiderdata_db_client.get_job_stats(
kwargs["pid"], job_collection_name
Expand Down

0 comments on commit fcd6e1e

Please sign in to comment.