7373
7474#{ Utilities
7575
76- def pack_object_at (data , offset , as_stream ):
76+ def pack_object_at (cursor , offset , as_stream ):
7777 """
7878 :return: Tuple(abs_data_offset, PackInfo|PackStream)
7979 an object of the correct type according to the type_id of the object.
@@ -83,7 +83,7 @@ def pack_object_at(data, offset, as_stream):
8383 :parma offset: offset in to the data at which the object information is located
8484 :param as_stream: if True, a stream object will be returned that can read
8585 the data, otherwise you receive an info object only"""
86- data = buffer ( data , offset )
86+ data = cursor . use_region ( offset ). buffer ( )
8787 type_id , uncomp_size , data_rela_offset = pack_object_header_info (data )
8888 total_rela_offset = None # set later, actual offset until data stream begins
8989 delta_info = None
@@ -269,6 +269,10 @@ def _set_cache_(self, attr):
269269 # that we can actually write to the location - it could be a read-only
270270 # alternate for instance
271271 self ._cursor = mman .make_cursor (self ._indexpath ).use_region ()
272+ # We will assume that the index will always fully fit into memory !
273+ if mman .window_size () > 0 and self ._cursor .file_size () > mman .window_size ():
274+ raise AssertionError ("The index file at %s is too large to fit into a mapped window (%i > %i). This is a limitation of the implementation" % (self ._indexpath , self ._cursor .file_size (), mman .window_size ()))
275+ #END assert window size
272276 else :
273277 # now its time to initialize everything - if we are here, someone wants
274278 # to access the fanout table or related properties
@@ -528,13 +532,13 @@ def _set_cache_(self, attr):
528532
529533 def _iter_objects (self , start_offset , as_stream = True ):
530534 """Handle the actual iteration of objects within this pack"""
531- data = self ._cursor . map ()
532- content_size = len ( data ) - self .footer_size
535+ c = self ._cursor
536+ content_size = c . file_size ( ) - self .footer_size
533537 cur_offset = start_offset or self .first_object_offset
534538
535539 null = NullStream ()
536540 while cur_offset < content_size :
537- data_offset , ostream = pack_object_at (data , cur_offset , True )
541+ data_offset , ostream = pack_object_at (c , cur_offset , True )
538542 # scrub the stream to the end - this decompresses the object, but yields
539543 # the amount of compressed bytes we need to get to the next offset
540544
@@ -563,12 +567,14 @@ def version(self):
563567 def data (self ):
564568 """
565569 :return: read-only data of this pack. It provides random access and usually
566- is a memory map"""
567- return self ._cursor .map ()
570+ is a memory map.
571+ :note: This method is unsafe as it returns a window into a file which might be larger than than the actual window size"""
572+ # can use map as we are starting at offset 0. Otherwise we would have to use buffer()
573+ return self ._cursor .use_region ().map ()
568574
569575 def checksum (self ):
570576 """:return: 20 byte sha1 hash on all object sha's contained in this file"""
571- return self ._cursor .map ()[ - 20 :]
577+ return self ._cursor .use_region ( self . _cursor . file_size () - 20 ). buffer ()[ :]
572578
573579 def path (self ):
574580 """:return: path to the packfile"""
@@ -587,9 +593,9 @@ def collect_streams(self, offset):
587593 If the object at offset is no delta, the size of the list is 1.
588594 :param offset: specifies the first byte of the object within this pack"""
589595 out = list ()
590- data = self ._cursor . map ()
596+ c = self ._cursor
591597 while True :
592- ostream = pack_object_at (data , offset , True )[1 ]
598+ ostream = pack_object_at (c , offset , True )[1 ]
593599 out .append (ostream )
594600 if ostream .type_id == OFS_DELTA :
595601 offset = ostream .pack_offset - ostream .delta_info
@@ -611,14 +617,14 @@ def info(self, offset):
611617
612618 :param offset: byte offset
613619 :return: OPackInfo instance, the actual type differs depending on the type_id attribute"""
614- return pack_object_at (self ._cursor . map () , offset or self .first_object_offset , False )[1 ]
620+ return pack_object_at (self ._cursor , offset or self .first_object_offset , False )[1 ]
615621
616622 def stream (self , offset ):
617623 """Retrieve an object at the given file-relative offset as stream along with its information
618624
619625 :param offset: byte offset
620626 :return: OPackStream instance, the actual type differs depending on the type_id attribute"""
621- return pack_object_at (self ._cursor . map () , offset or self .first_object_offset , True )[1 ]
627+ return pack_object_at (self ._cursor , offset or self .first_object_offset , True )[1 ]
622628
623629 def stream_iter (self , start_offset = 0 ):
624630 """
@@ -702,7 +708,7 @@ def _object(self, sha, as_stream, index=-1):
702708 sha = self ._index .sha (index )
703709 # END assure sha is present ( in output )
704710 offset = self ._index .offset (index )
705- type_id , uncomp_size , data_rela_offset = pack_object_header_info (buffer ( self ._pack ._cursor .map (), offset ))
711+ type_id , uncomp_size , data_rela_offset = pack_object_header_info (self ._pack ._cursor .use_region ( offset ). buffer ( ))
706712 if as_stream :
707713 if type_id not in delta_types :
708714 packstream = self ._pack .stream (offset )
0 commit comments