55"""Contains PackIndexFile and PackFile implementations""" 
66from  gitdb .exc  import  (
77						BadObject ,
8- 						UnsupportedOperation 
8+ 						UnsupportedOperation ,
9+ 						ParseError 
910						)
1011from  util  import  (
1112					zlib ,
1516					)
1617
1718from  fun  import  (
19+ 					create_pack_object_header ,
1820					pack_object_header_info ,
1921					is_equal_canonical_sha ,
2022					type_id_to_type_map ,
4749						DeltaApplyReader ,
4850						Sha1Writer ,
4951						NullStream ,
52+ 						FlexibleSha1Writer 
5053					)
5154
5255from  struct  import  (
5356						pack ,
5457						unpack ,
5558					)
5659
60+ from  binascii  import  crc32 
61+ 
5762from  itertools  import  izip 
5863import  array 
5964import  os 
@@ -119,10 +124,113 @@ def pack_object_at(data, offset, as_stream):
119124			return  abs_data_offset , ODeltaPackInfo (offset , type_id , uncomp_size , delta_info )
120125		# END handle info 
121126	# END handle stream 
122- 		
127+ 
128+ def  write_stream_to_pack (read , write , zstream , want_crc = False ):
129+ 	"""Copy a stream as read from read function, zip it, and write the result. 
130+ 	Count the number of written bytes and return it 
131+ 	:param want_crc: if True, the crc will be generated over the compressed data. 
132+ 	:return: tuple(no bytes read, no bytes written, crc32) crc might be 0 if want_crc 
133+ 		was false""" 
134+ 	br  =  0 		# bytes read 
135+ 	bw  =  0 		# bytes written 
136+ 	crc  =  0 
137+ 	
138+ 	while  True :
139+ 		chunk  =  read (chunk_size )
140+ 		br  +=  len (chunk )
141+ 		compressed  =  zstream .compress (chunk )
142+ 		bw  +=  len (compressed )
143+ 		write (compressed )			# cannot assume return value 
144+ 		
145+ 		if  want_crc :
146+ 			crc  =  crc32 (compressed , crc )
147+ 		#END handle crc 
148+ 		
149+ 		if  len (chunk ) !=  chunk_size :
150+ 			break 
151+ 	#END copy loop 
152+ 	
153+ 	compressed  =  zstream .flush ()
154+ 	bw  +=  len (compressed )
155+ 	write (compressed )
156+ 	if  want_crc :
157+ 		crc  =  crc32 (compressed , crc )
158+ 	#END handle crc 
159+ 	
160+ 	return  (br , bw , crc )
161+ 
162+ 
123163#} END utilities 
124164
125165
166+ class  IndexWriter (object ):
167+ 	"""Utility to cache index information, allowing to write all information later 
168+ 	in one go to the given stream 
169+ 	:note: currently only writes v2 indices""" 
170+ 	__slots__  =  '_objs' 
171+ 	
172+ 	def  __init__ (self ):
173+ 		self ._objs  =  list ()
174+ 		
175+ 	def  append (self , binsha , crc , offset ):
176+ 		"""Append one piece of object information""" 
177+ 		self ._objs .append ((binsha , crc , offset ))
178+ 		
179+ 	def  write (self , pack_binsha , write ):
180+ 		"""Write the index file using the given write method 
181+ 		:param pack_binsha: sha over the whole pack that we index""" 
182+ 		# sort for sha1 hash 
183+ 		self ._objs .sort (key = lambda  o : o [0 ])
184+ 		
185+ 		sha_writer  =  FlexibleSha1Writer (write )
186+ 		sha_write  =  sha_writer .write 
187+ 		sha_write (PackIndexFile .index_v2_signature )
188+ 		sha_write (pack (">L" , PackIndexFile .index_version_default ))
189+ 		
190+ 		# fanout 
191+ 		tmplist  =  list ((0 ,)* 256 )								# fanout or list with 64 bit offsets 
192+ 		for  t  in  self ._objs :
193+ 			tmplist [ord (t [0 ][0 ])] +=  1 
194+ 		#END prepare fanout 
195+ 		
196+ 		for  i  in  xrange (255 ):
197+ 			v  =  tmplist [i ]
198+ 			sha_write (pack ('>L' , v ))
199+ 			tmplist [i + 1 ] =  v 
200+ 		#END write each fanout entry 
201+ 		sha_write (pack ('>L' , tmplist [255 ]))
202+ 		
203+ 		# sha1 ordered 
204+ 		# save calls, that is push them into c 
205+ 		sha_write ('' .join (t [0 ] for  t  in  self ._objs ))
206+ 		
207+ 		# crc32 
208+ 		for  t  in  self ._objs :
209+ 			sha_write (pack ('>L' , t [1 ]& 0xffffffff ))
210+ 		#END for each crc 
211+ 		
212+ 		tmplist  =  list ()
213+ 		# offset 32 
214+ 		for  t  in  self ._objs :
215+ 			ofs  =  t [2 ]
216+ 			if  ofs  >  0x7fffffff :
217+ 				tmplist .append (ofs )
218+ 				ofs  =  0x80000000  +  len (tmplist )- 1 
219+ 			#END hande 64 bit offsets 
220+ 			sha_write (pack ('>L' , ofs & 0xffffffff ))
221+ 		#END for each offset 
222+ 		
223+ 		# offset 64 
224+ 		for  ofs  in  tmplist :
225+ 			sha_write (pack (">Q" , ofs ))
226+ 		#END for each offset 
227+ 		
228+ 		# trailer 
229+ 		assert (len (pack_binsha ) ==  20 )
230+ 		sha_write (pack_binsha )
231+ 		write (sha_writer .sha (as_hex = False ))
232+ 		
233+ 	
126234
127235class  PackIndexFile (LazyMixin ):
128236	"""A pack index provides offsets into the corresponding pack, allowing to find 
@@ -135,6 +243,8 @@ class PackIndexFile(LazyMixin):
135243
136244	# used in v2 indices 
137245	_sha_list_offset  =  8  +  1024 
246+ 	index_v2_signature  =  '\377 tOc' 
247+ 	index_version_default  =  2 
138248
139249	def  __init__ (self , indexpath ):
140250		super (PackIndexFile , self ).__init__ ()
@@ -155,7 +265,7 @@ def _set_cache_(self, attr):
155265			# to access the fanout table or related properties 
156266
157267			# CHECK VERSION 
158- 			self ._version  =  (self ._data [:4 ] ==  ' \377 tOc'   and  2 ) or  1 
268+ 			self ._version  =  (self ._data [:4 ] ==  self . index_v2_signature  and  2 ) or  1 
159269			if  self ._version  ==  2 :
160270				version_id  =  unpack_from (">L" , self ._data , 4 )[0 ] 
161271				assert  version_id  ==  self ._version , "Unsupported index version: %i"  %  version_id 
@@ -383,6 +493,8 @@ class PackFile(LazyMixin):
383493		case""" 
384494
385495	__slots__  =  ('_packpath' , '_data' , '_size' , '_version' )
496+ 	pack_signature  =  0x5041434b 		# 'PACK' 
497+ 	pack_version_default  =  2 
386498
387499	# offset into our data at which the first object starts 
388500	first_object_offset  =  3 * 4 		# header bytes 
@@ -396,15 +508,19 @@ def _set_cache_(self, attr):
396508			self ._data  =  file_contents_ro_filepath (self ._packpath )
397509
398510			# read the header information 
399- 			type_id , self ._version , self ._size  =  unpack_from (">4sLL " , self ._data , 0 )
511+ 			type_id , self ._version , self ._size  =  unpack_from (">LLL " , self ._data , 0 )
400512
401513			# TODO: figure out whether we should better keep the lock, or maybe 
402514			# add a .keep file instead ? 
403515		else : # must be '_size' or '_version' 
404516			# read header info - we do that just with a file stream 
405- 			type_id , self ._version , self ._size  =  unpack (">4sLL " , open (self ._packpath ).read (12 ))
517+ 			type_id , self ._version , self ._size  =  unpack (">LLL " , open (self ._packpath ).read (12 ))
406518		# END handle header 
407519
520+ 		if  type_id  !=  self .pack_signature :
521+ 			raise  ParseError ("Invalid pack signature: %i"  %  type_id )
522+ 		#END assert type id 
523+ 		
408524	def  _iter_objects (self , start_offset , as_stream = True ):
409525		"""Handle the actual iteration of objects within this pack""" 
410526		data  =  self ._data 
@@ -759,7 +875,8 @@ def collect_streams(self, sha):
759875
760876
761877	@classmethod  
762- 	def  create (cls , object_iter , pack_write , index_write = None ):
878+ 	def  write_pack (cls , object_iter , pack_write , index_write = None , 
879+ 					object_count  =  None , zlib_compression  =  zlib .Z_BEST_SPEED ):
763880		""" 
764881		Create a new pack by putting all objects obtained by the object_iterator 
765882		into a pack which is written using the pack_write method. 
@@ -769,9 +886,74 @@ def create(cls, object_iter, pack_write, index_write=None):
769886		:param pack_write: function to receive strings to write into the pack stream 
770887		:param indx_write: if not None, the function writes the index file corresponding 
771888			to the pack. 
889+ 		:param object_count: if you can provide the amount of objects in your iteration,  
890+ 			this would be the place to put it. Otherwise we have to pre-iterate and store  
891+ 			all items into a list to get the number, which uses more memory than necessary. 
892+ 		:param zlib_compression: the zlib compression level to use 
893+ 		:return: binary sha over all the contents of the pack 
772894		:note: The destination of the write functions is up to the user. It could 
773- 			be a socket, or a file for instance""" 
895+ 			be a socket, or a file for instance 
896+ 		:note: writes only undeltified objects""" 
897+ 		objs  =  object_iter 
898+ 		if  not  object_count :
899+ 			if  not  isinstance (object_iter , (tuple , list )):
900+ 				objs  =  list (object_iter )
901+ 			#END handle list type 
902+ 			object_count  =  len (objs )
903+ 		#END handle object 
904+ 		
905+ 		pack_writer  =  FlexibleSha1Writer (pack_write )
906+ 		pwrite  =  pack_writer .write 
907+ 		ofs  =  0 											# current offset into the pack file 
908+ 		index  =  None 
909+ 		wants_index  =  index_write  is  not   None 
910+ 		
911+ 		# write header 
912+ 		pwrite (pack ('>LLL' , PackFile .pack_signature , PackFile .pack_version_default , object_count ))
913+ 		ofs  +=  12 
914+ 		
915+ 		if  wants_index :
916+ 			index  =  IndexWriter ()
917+ 		#END handle index header 
918+ 		
919+ 		actual_count  =  0 
920+ 		for  obj  in  objs :
921+ 			actual_count  +=  1 
922+ 			
923+ 			# object header 
924+ 			hdr  =  create_pack_object_header (obj .type_id , obj .size )
925+ 			pwrite (hdr )
926+ 			
927+ 			# data stream 
928+ 			zstream  =  zlib .compressobj (zlib_compression )
929+ 			ostream  =  obj .stream 
930+ 			br , bw , crc  =  write_stream_to_pack (ostream .read , pwrite , zstream , want_crc  =  index_write )
931+ 			assert (br  ==  obj .size )
932+ 			if  wants_index :
933+ 				index .append (obj .binsha , crc , ofs )
934+ 			#END handle index 
935+ 			
936+ 			ofs  +=  len (hdr ) +  bw 
937+ 			if  actual_count  ==  object_count :
938+ 				break 
939+ 			#END abort once we are done 
940+ 		#END for each object 
941+ 		
942+ 		if  actual_count  !=  object_count :
943+ 			raise  ValueError ("Expected to write %i objects into pack, but received only %i from iterators"  %  (object_count , actual_count ))
944+ 		#END count assertion 
945+ 		
946+ 		# write footer 
947+ 		binsha  =  pack_writer .sha (as_hex  =  False )
948+ 		assert  len (binsha ) ==  20 
949+ 		pack_write (binsha )
950+ 		ofs  +=  len (binsha )							# just for completeness ;) 
951+ 		
952+ 		if  wants_index :
953+ 			index .write (binsha , index_write )
954+ 		#END handle index 
774955
956+ 		return  binsha 
775957
776958
777959	#} END interface 
0 commit comments