From 961ec34022a772eea246ecb0df149d59a42ab4d1 Mon Sep 17 00:00:00 2001 From: Jan Buchar Date: Wed, 6 Nov 2024 17:12:30 +0100 Subject: [PATCH 1/3] Fix total_size usage in memory size monitoring --- src/crawlee/_autoscaling/snapshotter.py | 1 - src/crawlee/_autoscaling/types.py | 3 --- src/crawlee/_utils/system.py | 18 ++++++++++++------ src/crawlee/events/_types.py | 4 ++-- tests/unit/_autoscaling/test_snapshotter.py | 1 - tests/unit/_autoscaling/test_system_status.py | 4 ---- 6 files changed, 14 insertions(+), 17 deletions(-) diff --git a/src/crawlee/_autoscaling/snapshotter.py b/src/crawlee/_autoscaling/snapshotter.py index 384318f9c5..e2f32634e4 100644 --- a/src/crawlee/_autoscaling/snapshotter.py +++ b/src/crawlee/_autoscaling/snapshotter.py @@ -223,7 +223,6 @@ def _snapshot_memory(self, event_data: EventSystemInfoData) -> None: event_data: System info data from which memory usage is read. """ snapshot = MemorySnapshot( - total_size=event_data.memory_info.total_size, current_size=event_data.memory_info.current_size, max_memory_size=self._max_memory_size, max_used_memory_ratio=self._max_used_memory_ratio, diff --git a/src/crawlee/_autoscaling/types.py b/src/crawlee/_autoscaling/types.py index 5ad2f07929..dbf1e828ad 100644 --- a/src/crawlee/_autoscaling/types.py +++ b/src/crawlee/_autoscaling/types.py @@ -88,9 +88,6 @@ def is_overloaded(self) -> bool: class MemorySnapshot: """A snapshot of memory usage.""" - total_size: ByteSize - """Total memory available in the system.""" - current_size: ByteSize """Memory usage of the current Python process and its children.""" diff --git a/src/crawlee/_utils/system.py b/src/crawlee/_utils/system.py index 7d52219b2c..bb5cbe927a 100644 --- a/src/crawlee/_utils/system.py +++ b/src/crawlee/_utils/system.py @@ -31,16 +31,11 @@ class CpuInfo(BaseModel): """The time at which the measurement was taken.""" -class MemoryInfo(BaseModel): +class MemoryUsageInfo(BaseModel): """Information about the memory usage.""" model_config = ConfigDict(populate_by_name=True) - total_size: Annotated[ - ByteSize, PlainValidator(ByteSize.validate), PlainSerializer(lambda size: size.bytes), Field(alias='totalSize') - ] - """Total memory available in the system.""" - current_size: Annotated[ ByteSize, PlainValidator(ByteSize.validate), @@ -56,6 +51,17 @@ class MemoryInfo(BaseModel): """The time at which the measurement was taken.""" +class MemoryInfo(MemoryUsageInfo): + """Information about system memory.""" + + model_config = ConfigDict(populate_by_name=True) + + total_size: Annotated[ + ByteSize, PlainValidator(ByteSize.validate), PlainSerializer(lambda size: size.bytes), Field(alias='totalSize') + ] + """Total memory available in the system.""" + + def get_cpu_info() -> CpuInfo: """Retrieves the current CPU usage. diff --git a/src/crawlee/events/_types.py b/src/crawlee/events/_types.py index ef1843deb9..7118f2ba76 100644 --- a/src/crawlee/events/_types.py +++ b/src/crawlee/events/_types.py @@ -7,7 +7,7 @@ from pydantic import BaseModel, ConfigDict, Field -from crawlee._utils.system import CpuInfo, MemoryInfo +from crawlee._utils.system import CpuInfo, MemoryInfo, MemoryUsageInfo class Event(str, Enum): @@ -45,7 +45,7 @@ class EventSystemInfoData(BaseModel): model_config = ConfigDict(populate_by_name=True) cpu_info: Annotated[CpuInfo, Field(alias='cpuInfo')] - memory_info: Annotated[MemoryInfo, Field(alias='memoryInfo')] + memory_info: Annotated[MemoryUsageInfo | MemoryInfo, Field(alias='memoryInfo')] class EventMigratingData(BaseModel): diff --git a/tests/unit/_autoscaling/test_snapshotter.py b/tests/unit/_autoscaling/test_snapshotter.py index cbbc10d333..c1addba7b2 100644 --- a/tests/unit/_autoscaling/test_snapshotter.py +++ b/tests/unit/_autoscaling/test_snapshotter.py @@ -47,7 +47,6 @@ def test_snapshot_memory(snapshotter: Snapshotter, event_system_data_info: Event snapshotter._snapshot_memory(event_system_data_info) assert len(snapshotter._memory_snapshots) == 1 assert snapshotter._memory_snapshots[0].current_size == event_system_data_info.memory_info.current_size - assert snapshotter._memory_snapshots[0].total_size == event_system_data_info.memory_info.total_size def test_snapshot_event_loop(snapshotter: Snapshotter) -> None: diff --git a/tests/unit/_autoscaling/test_system_status.py b/tests/unit/_autoscaling/test_system_status.py index ff7ffd637b..7da6cb6004 100644 --- a/tests/unit/_autoscaling/test_system_status.py +++ b/tests/unit/_autoscaling/test_system_status.py @@ -89,28 +89,24 @@ def test_get_system_info(snapshotter: Snapshotter, now: datetime) -> None: # Add memory snapshots system_status._snapshotter._memory_snapshots = [ MemorySnapshot( - total_size=ByteSize.from_gb(16), current_size=ByteSize.from_gb(4), max_memory_size=ByteSize.from_gb(12), max_used_memory_ratio=0.8, created_at=now - timedelta(minutes=3), ), MemorySnapshot( - total_size=ByteSize.from_gb(8), current_size=ByteSize.from_gb(7), max_memory_size=ByteSize.from_gb(8), max_used_memory_ratio=0.8, created_at=now - timedelta(minutes=2), ), MemorySnapshot( - total_size=ByteSize.from_gb(32), current_size=ByteSize.from_gb(28), max_memory_size=ByteSize.from_gb(30), max_used_memory_ratio=0.8, created_at=now - timedelta(minutes=1), ), MemorySnapshot( - total_size=ByteSize.from_gb(64), current_size=ByteSize.from_gb(48), max_memory_size=ByteSize.from_gb(60), max_used_memory_ratio=0.8, From 6f57d89d5c0f869e8f98a15b5969f856bead9f42 Mon Sep 17 00:00:00 2001 From: Jan Buchar Date: Wed, 6 Nov 2024 17:15:05 +0100 Subject: [PATCH 2/3] Add comment --- src/crawlee/events/_types.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/crawlee/events/_types.py b/src/crawlee/events/_types.py index 7118f2ba76..9ec95240f9 100644 --- a/src/crawlee/events/_types.py +++ b/src/crawlee/events/_types.py @@ -45,7 +45,10 @@ class EventSystemInfoData(BaseModel): model_config = ConfigDict(populate_by_name=True) cpu_info: Annotated[CpuInfo, Field(alias='cpuInfo')] - memory_info: Annotated[MemoryUsageInfo | MemoryInfo, Field(alias='memoryInfo')] + memory_info: Annotated[ + MemoryUsageInfo | MemoryInfo, # MemoryInfo is left here for BC + Field(alias='memoryInfo'), + ] class EventMigratingData(BaseModel): From 83506d4b2102125f13e8f3ea16846d087ddbd596 Mon Sep 17 00:00:00 2001 From: Jan Buchar Date: Wed, 6 Nov 2024 17:28:08 +0100 Subject: [PATCH 3/3] Actually, looks like the Union is unnecessary --- src/crawlee/events/_types.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/crawlee/events/_types.py b/src/crawlee/events/_types.py index 9ec95240f9..56c5505487 100644 --- a/src/crawlee/events/_types.py +++ b/src/crawlee/events/_types.py @@ -7,7 +7,7 @@ from pydantic import BaseModel, ConfigDict, Field -from crawlee._utils.system import CpuInfo, MemoryInfo, MemoryUsageInfo +from crawlee._utils.system import CpuInfo, MemoryUsageInfo class Event(str, Enum): @@ -46,7 +46,7 @@ class EventSystemInfoData(BaseModel): cpu_info: Annotated[CpuInfo, Field(alias='cpuInfo')] memory_info: Annotated[ - MemoryUsageInfo | MemoryInfo, # MemoryInfo is left here for BC + MemoryUsageInfo, Field(alias='memoryInfo'), ]