1010 )
1111from util import (
1212 zlib ,
13+ mman ,
1314 LazyMixin ,
1415 unpack_from ,
1516 bin_to_hex ,
16- file_contents_ro_filepath ,
1717 )
1818
1919from fun import (
@@ -247,7 +247,7 @@ class PackIndexFile(LazyMixin):
247247
248248 # Dont use slots as we dynamically bind functions for each version, need a dict for this
249249 # The slots you see here are just to keep track of our instance variables
250- # __slots__ = ('_indexpath', '_fanout_table', '_data ', '_version',
250+ # __slots__ = ('_indexpath', '_fanout_table', '_cursor ', '_version',
251251 # '_sha_list_offset', '_crc_list_offset', '_pack_offset', '_pack_64_offset')
252252
253253 # used in v2 indices
@@ -261,22 +261,23 @@ def __init__(self, indexpath):
261261
262262 def _set_cache_ (self , attr ):
263263 if attr == "_packfile_checksum" :
264- self ._packfile_checksum = self ._data [- 40 :- 20 ]
264+ self ._packfile_checksum = self ._cursor . map () [- 40 :- 20 ]
265265 elif attr == "_packfile_checksum" :
266- self ._packfile_checksum = self ._data [- 20 :]
267- elif attr == "_data " :
266+ self ._packfile_checksum = self ._cursor . map () [- 20 :]
267+ elif attr == "_cursor " :
268268 # Note: We don't lock the file when reading as we cannot be sure
269269 # that we can actually write to the location - it could be a read-only
270270 # alternate for instance
271- self ._data = file_contents_ro_filepath (self ._indexpath )
271+ self ._cursor = mman . make_cursor (self ._indexpath ). use_region ( )
272272 else :
273273 # now its time to initialize everything - if we are here, someone wants
274274 # to access the fanout table or related properties
275275
276276 # CHECK VERSION
277- self ._version = (self ._data [:4 ] == self .index_v2_signature and 2 ) or 1
277+ mmap = self ._cursor .map ()
278+ self ._version = (mmap [:4 ] == self .index_v2_signature and 2 ) or 1
278279 if self ._version == 2 :
279- version_id = unpack_from (">L" , self . _data , 4 )[0 ]
280+ version_id = unpack_from (">L" , mmap , 4 )[0 ]
280281 assert version_id == self ._version , "Unsupported index version: %i" % version_id
281282 # END assert version
282283
@@ -297,16 +298,16 @@ def _set_cache_(self, attr):
297298
298299 def _entry_v1 (self , i ):
299300 """:return: tuple(offset, binsha, 0)"""
300- return unpack_from (">L20s" , self ._data , 1024 + i * 24 ) + (0 , )
301+ return unpack_from (">L20s" , self ._cursor . map () , 1024 + i * 24 ) + (0 , )
301302
302303 def _offset_v1 (self , i ):
303304 """see ``_offset_v2``"""
304- return unpack_from (">L" , self ._data , 1024 + i * 24 )[0 ]
305+ return unpack_from (">L" , self ._cursor . map () , 1024 + i * 24 )[0 ]
305306
306307 def _sha_v1 (self , i ):
307308 """see ``_sha_v2``"""
308309 base = 1024 + (i * 24 )+ 4
309- return self ._data [base :base + 20 ]
310+ return self ._cursor . map () [base :base + 20 ]
310311
311312 def _crc_v1 (self , i ):
312313 """unsupported"""
@@ -322,25 +323,25 @@ def _entry_v2(self, i):
322323 def _offset_v2 (self , i ):
323324 """:return: 32 or 64 byte offset into pack files. 64 byte offsets will only
324325 be returned if the pack is larger than 4 GiB, or 2^32"""
325- offset = unpack_from (">L" , self ._data , self ._pack_offset + i * 4 )[0 ]
326+ offset = unpack_from (">L" , self ._cursor . map () , self ._pack_offset + i * 4 )[0 ]
326327
327328 # if the high-bit is set, this indicates that we have to lookup the offset
328329 # in the 64 bit region of the file. The current offset ( lower 31 bits )
329330 # are the index into it
330331 if offset & 0x80000000 :
331- offset = unpack_from (">Q" , self ._data , self ._pack_64_offset + (offset & ~ 0x80000000 ) * 8 )[0 ]
332+ offset = unpack_from (">Q" , self ._cursor . map () , self ._pack_64_offset + (offset & ~ 0x80000000 ) * 8 )[0 ]
332333 # END handle 64 bit offset
333334
334335 return offset
335336
336337 def _sha_v2 (self , i ):
337338 """:return: sha at the given index of this file index instance"""
338339 base = self ._sha_list_offset + i * 20
339- return self ._data [base :base + 20 ]
340+ return self ._cursor . map () [base :base + 20 ]
340341
341342 def _crc_v2 (self , i ):
342343 """:return: 4 bytes crc for the object at index i"""
343- return unpack_from (">L" , self ._data , self ._crc_list_offset + i * 4 )[0 ]
344+ return unpack_from (">L" , self ._cursor . map () , self ._crc_list_offset + i * 4 )[0 ]
344345
345346 #} END access V2
346347
@@ -358,7 +359,7 @@ def _initialize(self):
358359
359360 def _read_fanout (self , byte_offset ):
360361 """Generate a fanout table from our data"""
361- d = self ._data
362+ d = self ._cursor . map ()
362363 out = list ()
363364 append = out .append
364365 for i in range (256 ):
@@ -382,19 +383,19 @@ def path(self):
382383
383384 def packfile_checksum (self ):
384385 """:return: 20 byte sha representing the sha1 hash of the pack file"""
385- return self ._data [- 40 :- 20 ]
386+ return self ._cursor . map () [- 40 :- 20 ]
386387
387388 def indexfile_checksum (self ):
388389 """:return: 20 byte sha representing the sha1 hash of this index file"""
389- return self ._data [- 20 :]
390+ return self ._cursor . map () [- 20 :]
390391
391392 def offsets (self ):
392393 """:return: sequence of all offsets in the order in which they were written
393394 :note: return value can be random accessed, but may be immmutable"""
394395 if self ._version == 2 :
395396 # read stream to array, convert to tuple
396397 a = array .array ('I' ) # 4 byte unsigned int, long are 8 byte on 64 bit it appears
397- a .fromstring (buffer (self ._data , self ._pack_offset , self ._pack_64_offset - self ._pack_offset ))
398+ a .fromstring (buffer (self ._cursor . map () , self ._pack_offset , self ._pack_64_offset - self ._pack_offset ))
398399
399400 # networkbyteorder to something array likes more
400401 if sys .byteorder == 'little' :
@@ -501,7 +502,7 @@ class PackFile(LazyMixin):
501502 for some reason - one clearly doesn't want to read 10GB at once in that
502503 case"""
503504
504- __slots__ = ('_packpath' , '_data ' , '_size' , '_version' )
505+ __slots__ = ('_packpath' , '_cursor ' , '_size' , '_version' )
505506 pack_signature = 0x5041434b # 'PACK'
506507 pack_version_default = 2
507508
@@ -513,26 +514,20 @@ def __init__(self, packpath):
513514 self ._packpath = packpath
514515
515516 def _set_cache_ (self , attr ):
516- if attr == '_data' :
517- self ._data = file_contents_ro_filepath (self ._packpath )
518-
519- # read the header information
520- type_id , self ._version , self ._size = unpack_from (">LLL" , self ._data , 0 )
521-
522- # TODO: figure out whether we should better keep the lock, or maybe
523- # add a .keep file instead ?
524- else : # must be '_size' or '_version'
525- # read header info - we do that just with a file stream
526- type_id , self ._version , self ._size = unpack (">LLL" , open (self ._packpath ).read (12 ))
527- # END handle header
517+ # we fill the whole cache, whichever attribute gets queried first
518+ self ._cursor = mman .make_cursor (self ._packpath ).use_region ()
528519
520+ # read the header information
521+ type_id , self ._version , self ._size = unpack_from (">LLL" , self ._cursor .map (), 0 )
522+
523+ # TODO: figure out whether we should better keep the lock, or maybe
524+ # add a .keep file instead ?
529525 if type_id != self .pack_signature :
530526 raise ParseError ("Invalid pack signature: %i" % type_id )
531- #END assert type id
532527
533528 def _iter_objects (self , start_offset , as_stream = True ):
534529 """Handle the actual iteration of objects within this pack"""
535- data = self ._data
530+ data = self ._cursor . map ()
536531 content_size = len (data ) - self .footer_size
537532 cur_offset = start_offset or self .first_object_offset
538533
@@ -568,11 +563,11 @@ def data(self):
568563 """
569564 :return: read-only data of this pack. It provides random access and usually
570565 is a memory map"""
571- return self ._data
566+ return self ._cursor . map ()
572567
573568 def checksum (self ):
574569 """:return: 20 byte sha1 hash on all object sha's contained in this file"""
575- return self ._data [- 20 :]
570+ return self ._cursor . map () [- 20 :]
576571
577572 def path (self ):
578573 """:return: path to the packfile"""
@@ -591,8 +586,9 @@ def collect_streams(self, offset):
591586 If the object at offset is no delta, the size of the list is 1.
592587 :param offset: specifies the first byte of the object within this pack"""
593588 out = list ()
589+ data = self ._cursor .map ()
594590 while True :
595- ostream = pack_object_at (self . _data , offset , True )[1 ]
591+ ostream = pack_object_at (data , offset , True )[1 ]
596592 out .append (ostream )
597593 if ostream .type_id == OFS_DELTA :
598594 offset = ostream .pack_offset - ostream .delta_info
@@ -614,14 +610,14 @@ def info(self, offset):
614610
615611 :param offset: byte offset
616612 :return: OPackInfo instance, the actual type differs depending on the type_id attribute"""
617- return pack_object_at (self ._data , offset or self .first_object_offset , False )[1 ]
613+ return pack_object_at (self ._cursor . map () , offset or self .first_object_offset , False )[1 ]
618614
619615 def stream (self , offset ):
620616 """Retrieve an object at the given file-relative offset as stream along with its information
621617
622618 :param offset: byte offset
623619 :return: OPackStream instance, the actual type differs depending on the type_id attribute"""
624- return pack_object_at (self ._data , offset or self .first_object_offset , True )[1 ]
620+ return pack_object_at (self ._cursor . map () , offset or self .first_object_offset , True )[1 ]
625621
626622 def stream_iter (self , start_offset = 0 ):
627623 """
@@ -704,7 +700,7 @@ def _object(self, sha, as_stream, index=-1):
704700 sha = self ._index .sha (index )
705701 # END assure sha is present ( in output )
706702 offset = self ._index .offset (index )
707- type_id , uncomp_size , data_rela_offset = pack_object_header_info (buffer (self ._pack ._data , offset ))
703+ type_id , uncomp_size , data_rela_offset = pack_object_header_info (buffer (self ._pack ._cursor . map () , offset ))
708704 if as_stream :
709705 if type_id not in delta_types :
710706 packstream = self ._pack .stream (offset )
0 commit comments