Skip to content

Commit 0e64168

Browse files
committed
pack: updated to use its cursor properly, which will be required if huge packs should be handled. This reduces performance as each access requires the windows to be checked/adjusted, but that is how it is. This should be circumvented using other backends, like the one of the gitcmd or libgit2.
Default is now the sliding memory map manager
1 parent aea587d commit 0e64168

File tree

2 files changed

+21
-14
lines changed

2 files changed

+21
-14
lines changed

gitdb/pack.py

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@
7373

7474
#{ Utilities
7575

76-
def pack_object_at(data, offset, as_stream):
76+
def pack_object_at(cursor, offset, as_stream):
7777
"""
7878
:return: Tuple(abs_data_offset, PackInfo|PackStream)
7979
an object of the correct type according to the type_id of the object.
@@ -83,7 +83,7 @@ def pack_object_at(data, offset, as_stream):
8383
:parma offset: offset in to the data at which the object information is located
8484
:param as_stream: if True, a stream object will be returned that can read
8585
the data, otherwise you receive an info object only"""
86-
data = buffer(data, offset)
86+
data = cursor.use_region(offset).buffer()
8787
type_id, uncomp_size, data_rela_offset = pack_object_header_info(data)
8888
total_rela_offset = None # set later, actual offset until data stream begins
8989
delta_info = None
@@ -269,6 +269,10 @@ def _set_cache_(self, attr):
269269
# that we can actually write to the location - it could be a read-only
270270
# alternate for instance
271271
self._cursor = mman.make_cursor(self._indexpath).use_region()
272+
# We will assume that the index will always fully fit into memory !
273+
if mman.window_size() > 0 and self._cursor.file_size() > mman.window_size():
274+
raise AssertionError("The index file at %s is too large to fit into a mapped window (%i > %i). This is a limitation of the implementation" % (self._indexpath, self._cursor.file_size(), mman.window_size()))
275+
#END assert window size
272276
else:
273277
# now its time to initialize everything - if we are here, someone wants
274278
# to access the fanout table or related properties
@@ -528,13 +532,13 @@ def _set_cache_(self, attr):
528532

529533
def _iter_objects(self, start_offset, as_stream=True):
530534
"""Handle the actual iteration of objects within this pack"""
531-
data = self._cursor.map()
532-
content_size = len(data) - self.footer_size
535+
c = self._cursor
536+
content_size = c.file_size() - self.footer_size
533537
cur_offset = start_offset or self.first_object_offset
534538

535539
null = NullStream()
536540
while cur_offset < content_size:
537-
data_offset, ostream = pack_object_at(data, cur_offset, True)
541+
data_offset, ostream = pack_object_at(c, cur_offset, True)
538542
# scrub the stream to the end - this decompresses the object, but yields
539543
# the amount of compressed bytes we need to get to the next offset
540544

@@ -563,12 +567,14 @@ def version(self):
563567
def data(self):
564568
"""
565569
:return: read-only data of this pack. It provides random access and usually
566-
is a memory map"""
567-
return self._cursor.map()
570+
is a memory map.
571+
:note: This method is unsafe as it returns a window into a file which might be larger than than the actual window size"""
572+
# can use map as we are starting at offset 0. Otherwise we would have to use buffer()
573+
return self._cursor.use_region().map()
568574

569575
def checksum(self):
570576
""":return: 20 byte sha1 hash on all object sha's contained in this file"""
571-
return self._cursor.map()[-20:]
577+
return self._cursor.use_region(self._cursor.file_size()-20).buffer()[:]
572578

573579
def path(self):
574580
""":return: path to the packfile"""
@@ -587,9 +593,9 @@ def collect_streams(self, offset):
587593
If the object at offset is no delta, the size of the list is 1.
588594
:param offset: specifies the first byte of the object within this pack"""
589595
out = list()
590-
data = self._cursor.map()
596+
c = self._cursor
591597
while True:
592-
ostream = pack_object_at(data, offset, True)[1]
598+
ostream = pack_object_at(c, offset, True)[1]
593599
out.append(ostream)
594600
if ostream.type_id == OFS_DELTA:
595601
offset = ostream.pack_offset - ostream.delta_info
@@ -611,14 +617,14 @@ def info(self, offset):
611617
612618
:param offset: byte offset
613619
:return: OPackInfo instance, the actual type differs depending on the type_id attribute"""
614-
return pack_object_at(self._cursor.map(), offset or self.first_object_offset, False)[1]
620+
return pack_object_at(self._cursor, offset or self.first_object_offset, False)[1]
615621

616622
def stream(self, offset):
617623
"""Retrieve an object at the given file-relative offset as stream along with its information
618624
619625
:param offset: byte offset
620626
:return: OPackStream instance, the actual type differs depending on the type_id attribute"""
621-
return pack_object_at(self._cursor.map(), offset or self.first_object_offset, True)[1]
627+
return pack_object_at(self._cursor, offset or self.first_object_offset, True)[1]
622628

623629
def stream_iter(self, start_offset=0):
624630
"""
@@ -702,7 +708,7 @@ def _object(self, sha, as_stream, index=-1):
702708
sha = self._index.sha(index)
703709
# END assure sha is present ( in output )
704710
offset = self._index.offset(index)
705-
type_id, uncomp_size, data_rela_offset = pack_object_header_info(buffer(self._pack._cursor.map(), offset))
711+
type_id, uncomp_size, data_rela_offset = pack_object_header_info(self._pack._cursor.use_region(offset).buffer())
706712
if as_stream:
707713
if type_id not in delta_types:
708714
packstream = self._pack.stream(offset)

gitdb/util.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,13 @@
2525
from async import ThreadPool
2626
from smmap import (
2727
StaticWindowMapManager,
28+
SlidingWindowMapManager,
2829
SlidingWindowMapBuffer
2930
)
3031

3132
# initialize our global memory manager instance
3233
# Use it to free cached (and unused) resources.
33-
mman = StaticWindowMapManager()
34+
mman = SlidingWindowMapManager()
3435

3536
try:
3637
import hashlib

0 commit comments

Comments
 (0)