Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Tree: b5845e0abb
Fetching contributors…

Cannot retrieve contributors at this time

935 lines (802 sloc) 32.685 kB
{-# OPTIONS_GHC -fno-warn-unused-imports #-}
-----------------------------------------------------------------------------
-- |
-- Module : Distribution.Client.Tar
-- Copyright : (c) 2007 Bjorn Bringert,
-- 2008 Andrea Vezzosi,
-- 2008-2009 Duncan Coutts
-- License : BSD3
--
-- Maintainer : duncan@community.haskell.org
-- Portability : portable
--
-- Reading, writing and manipulating \"@.tar@\" archive files.
--
-----------------------------------------------------------------------------
module Distribution.Client.Tar (
-- * High level \"all in one\" operations
createTarGzFile,
extractTarGzFile,
-- * Converting between internal and external representation
read,
write,
writeEntries,
-- * Packing and unpacking files to\/from internal representation
pack,
unpack,
-- * Tar entry and associated types
Entry(..),
entryPath,
EntryContent(..),
Ownership(..),
FileSize,
Permissions,
EpochTime,
DevMajor,
DevMinor,
TypeCode,
Format(..),
buildTreeRefTypeCode,
entrySizeInBlocks,
entrySizeInBytes,
-- * Constructing simple entry values
simpleEntry,
fileEntry,
directoryEntry,
-- * TarPath type
TarPath,
toTarPath,
fromTarPath,
-- ** Sequences of tar entries
Entries(..),
foldrEntries,
foldlEntries,
unfoldrEntries,
mapEntries,
filterEntries,
entriesIndex,
) where
import Data.Char (ord)
import Data.Int (Int64)
import Data.Bits (Bits, shiftL, testBit)
import Data.List (foldl')
import Numeric (readOct, showOct)
import Control.Monad (MonadPlus(mplus), when)
import qualified Data.Map as Map
import qualified Data.ByteString.Lazy as BS
import qualified Data.ByteString.Lazy.Char8 as BS.Char8
import Data.ByteString.Lazy (ByteString)
import qualified Codec.Compression.GZip as GZip
import qualified Distribution.Client.GZipUtils as GZipUtils
import System.FilePath
( (</>) )
import qualified System.FilePath as FilePath.Native
import qualified System.FilePath.Windows as FilePath.Windows
import qualified System.FilePath.Posix as FilePath.Posix
import System.Directory
( getDirectoryContents, doesDirectoryExist, getModificationTime
, getPermissions, createDirectoryIfMissing, copyFile )
import qualified System.Directory as Permissions
( Permissions(executable) )
import Distribution.Compat.FilePerms
( setFileExecutable )
import System.Posix.Types
( FileMode )
import System.Time
( ClockTime(..) )
import System.IO
( IOMode(ReadMode), openBinaryFile, hFileSize )
import System.IO.Unsafe (unsafeInterleaveIO)
import Prelude hiding (read)
--
-- * High level operations
--
createTarGzFile :: FilePath -- ^ Full Tarball path
-> FilePath -- ^ Base directory
-> FilePath -- ^ Directory to archive, relative to base dir
-> IO ()
createTarGzFile tar base dir =
BS.writeFile tar . GZip.compress . write =<< pack base [dir]
extractTarGzFile :: FilePath -- ^ Destination directory
-> FilePath -- ^ Expected subdir (to check for tarbombs)
-> FilePath -- ^ Tarball
-> IO ()
extractTarGzFile dir expected tar = do
unpack dir . checkTarbomb expected . read . GZipUtils.maybeDecompress =<< BS.readFile tar
--
-- * Entry type
--
type FileSize = Int64
-- | The number of seconds since the UNIX epoch
type EpochTime = Int64
type DevMajor = Int
type DevMinor = Int
type TypeCode = Char
type Permissions = FileMode
-- | Tar archive entry.
--
data Entry = Entry {
-- | The path of the file or directory within the archive. This is in a
-- tar-specific form. Use 'entryPath' to get a native 'FilePath'.
entryTarPath :: !TarPath,
-- | The real content of the entry. For 'NormalFile' this includes the
-- file data. An entry usually contains a 'NormalFile' or a 'Directory'.
entryContent :: !EntryContent,
-- | File permissions (Unix style file mode).
entryPermissions :: !Permissions,
-- | The user and group to which this file belongs.
entryOwnership :: !Ownership,
-- | The time the file was last modified.
entryTime :: !EpochTime,
-- | The tar format the archive is using.
entryFormat :: !Format
}
-- | Type code for the local build tree reference entry type. We don't use the
-- symbolic link entry type because it allows only 100 ASCII characters for the
-- path.
buildTreeRefTypeCode :: TypeCode
buildTreeRefTypeCode = 'C'
-- | Native 'FilePath' of the file or directory within the archive.
--
entryPath :: Entry -> FilePath
entryPath = fromTarPath . entryTarPath
-- | Return the size of an entry in bytes.
entrySizeInBytes :: Entry -> FileSize
entrySizeInBytes = (*512) . fromIntegral . entrySizeInBlocks
-- | Return the number of blocks in an entry.
entrySizeInBlocks :: Entry -> Int
entrySizeInBlocks entry = 1 + case entryContent entry of
NormalFile _ size -> bytesToBlocks size
OtherEntryType _ _ size -> bytesToBlocks size
_ -> 0
where
bytesToBlocks s = 1 + ((fromIntegral s - 1) `div` 512)
-- | The content of a tar archive entry, which depends on the type of entry.
--
-- Portable archives should contain only 'NormalFile' and 'Directory'.
--
data EntryContent = NormalFile ByteString !FileSize
| Directory
| SymbolicLink !LinkTarget
| HardLink !LinkTarget
| CharacterDevice !DevMajor !DevMinor
| BlockDevice !DevMajor !DevMinor
| NamedPipe
| OtherEntryType !TypeCode ByteString !FileSize
data Ownership = Ownership {
-- | The owner user name. Should be set to @\"\"@ if unknown.
ownerName :: String,
-- | The owner group name. Should be set to @\"\"@ if unknown.
groupName :: String,
-- | Numeric owner user id. Should be set to @0@ if unknown.
ownerId :: !Int,
-- | Numeric owner group id. Should be set to @0@ if unknown.
groupId :: !Int
}
-- | There have been a number of extensions to the tar file format over the
-- years. They all share the basic entry fields and put more meta-data in
-- different extended headers.
--
data Format =
-- | This is the classic Unix V7 tar format. It does not support owner and
-- group names, just numeric Ids. It also does not support device numbers.
V7Format
-- | The \"USTAR\" format is an extension of the classic V7 format. It was
-- later standardised by POSIX. It has some restructions but is the most
-- portable format.
--
| UstarFormat
-- | The GNU tar implementation also extends the classic V7 format, though
-- in a slightly different way from the USTAR format. In general for new
-- archives the standard USTAR/POSIX should be used.
--
| GnuFormat
deriving Eq
-- | @rw-r--r--@ for normal files
ordinaryFilePermissions :: Permissions
ordinaryFilePermissions = 0o0644
-- | @rwxr-xr-x@ for executable files
executableFilePermissions :: Permissions
executableFilePermissions = 0o0755
-- | @rwxr-xr-x@ for directories
directoryPermissions :: Permissions
directoryPermissions = 0o0755
isExecutable :: Permissions -> Bool
isExecutable p = testBit p 0 || testBit p 6 -- user or other exectuable
-- | An 'Entry' with all default values except for the file name and type. It
-- uses the portable USTAR/POSIX format (see 'UstarHeader').
--
-- You can use this as a basis and override specific fields, eg:
--
-- > (emptyEntry name HardLink) { linkTarget = target }
--
simpleEntry :: TarPath -> EntryContent -> Entry
simpleEntry tarpath content = Entry {
entryTarPath = tarpath,
entryContent = content,
entryPermissions = case content of
Directory -> directoryPermissions
_ -> ordinaryFilePermissions,
entryOwnership = Ownership "" "" 0 0,
entryTime = 0,
entryFormat = UstarFormat
}
-- | A tar 'Entry' for a file.
--
-- Entry fields such as file permissions and ownership have default values.
--
-- You can use this as a basis and override specific fields. For example if you
-- need an executable file you could use:
--
-- > (fileEntry name content) { fileMode = executableFileMode }
--
fileEntry :: TarPath -> ByteString -> Entry
fileEntry name fileContent =
simpleEntry name (NormalFile fileContent (BS.length fileContent))
-- | A tar 'Entry' for a directory.
--
-- Entry fields such as file permissions and ownership have default values.
--
directoryEntry :: TarPath -> Entry
directoryEntry name = simpleEntry name Directory
--
-- * Tar paths
--
-- | The classic tar format allowed just 100 characters for the file name. The
-- USTAR format extended this with an extra 155 characters, however it uses a
-- complex method of splitting the name between the two sections.
--
-- Instead of just putting any overflow into the extended area, it uses the
-- extended area as a prefix. The aggravating insane bit however is that the
-- prefix (if any) must only contain a directory prefix. That is the split
-- between the two areas must be on a directory separator boundary. So there is
-- no simple calculation to work out if a file name is too long. Instead we
-- have to try to find a valid split that makes the name fit in the two areas.
--
-- The rationale presumably was to make it a bit more compatible with old tar
-- programs that only understand the classic format. A classic tar would be
-- able to extract the file name and possibly some dir prefix, but not the
-- full dir prefix. So the files would end up in the wrong place, but that's
-- probably better than ending up with the wrong names too.
--
-- So it's understandable but rather annoying.
--
-- * Tar paths use posix format (ie @\'/\'@ directory separators), irrespective
-- of the local path conventions.
--
-- * The directory separator between the prefix and name is /not/ stored.
--
data TarPath = TarPath FilePath -- path name, 100 characters max.
FilePath -- path prefix, 155 characters max.
deriving (Eq, Ord)
-- | Convert a 'TarPath' to a native 'FilePath'.
--
-- The native 'FilePath' will use the native directory separator but it is not
-- otherwise checked for validity or sanity. In particular:
--
-- * The tar path may be invalid as a native path, eg the filename @\"nul\"@ is
-- not valid on Windows.
--
-- * The tar path may be an absolute path or may contain @\"..\"@ components.
-- For security reasons this should not usually be allowed, but it is your
-- responsibility to check for these conditions (eg using 'checkSecurity').
--
fromTarPath :: TarPath -> FilePath
fromTarPath (TarPath name prefix) = adjustDirectory $
FilePath.Native.joinPath $ FilePath.Posix.splitDirectories prefix
++ FilePath.Posix.splitDirectories name
where
adjustDirectory | FilePath.Posix.hasTrailingPathSeparator name
= FilePath.Native.addTrailingPathSeparator
| otherwise = id
-- | Convert a native 'FilePath' to a 'TarPath'.
--
-- The conversion may fail if the 'FilePath' is too long. See 'TarPath' for a
-- description of the problem with splitting long 'FilePath's.
--
toTarPath :: Bool -- ^ Is the path for a directory? This is needed because for
-- directories a 'TarPath' must always use a trailing @\/@.
-> FilePath -> Either String TarPath
toTarPath isDir = splitLongPath
. addTrailingSep
. FilePath.Posix.joinPath
. FilePath.Native.splitDirectories
where
addTrailingSep | isDir = FilePath.Posix.addTrailingPathSeparator
| otherwise = id
-- | Take a sanitized path, split on directory separators and try to pack it
-- into the 155 + 100 tar file name format.
--
-- The stragey is this: take the name-directory components in reverse order
-- and try to fit as many components into the 100 long name area as possible.
-- If all the remaining components fit in the 155 name area then we win.
--
splitLongPath :: FilePath -> Either String TarPath
splitLongPath path =
case packName nameMax (reverse (FilePath.Posix.splitPath path)) of
Left err -> Left err
Right (name, []) -> Right (TarPath name "")
Right (name, first:rest) -> case packName prefixMax remainder of
Left err -> Left err
Right (_ , (_:_)) -> Left "File name too long (cannot split)"
Right (prefix, []) -> Right (TarPath name prefix)
where
-- drop the '/' between the name and prefix:
remainder = init first : rest
where
nameMax, prefixMax :: Int
nameMax = 100
prefixMax = 155
packName _ [] = Left "File name empty"
packName maxLen (c:cs)
| n > maxLen = Left "File name too long"
| otherwise = Right (packName' maxLen n [c] cs)
where n = length c
packName' maxLen n ok (c:cs)
| n' <= maxLen = packName' maxLen n' (c:ok) cs
where n' = n + length c
packName' _ _ ok cs = (FilePath.Posix.joinPath ok, cs)
-- | The tar format allows just 100 ASCII characters for the 'SymbolicLink' and
-- 'HardLink' entry types.
--
newtype LinkTarget = LinkTarget FilePath
deriving (Eq, Ord)
-- | Convert a tar 'LinkTarget' to a native 'FilePath'.
--
fromLinkTarget :: LinkTarget -> FilePath
fromLinkTarget (LinkTarget path) = adjustDirectory $
FilePath.Native.joinPath $ FilePath.Posix.splitDirectories path
where
adjustDirectory | FilePath.Posix.hasTrailingPathSeparator path
= FilePath.Native.addTrailingPathSeparator
| otherwise = id
--
-- * Entries type
--
-- | A tar archive is a sequence of entries.
data Entries = Next Entry Entries
| Done
| Fail String
unfoldrEntries :: (a -> Either String (Maybe (Entry, a))) -> a -> Entries
unfoldrEntries f = unfold
where
unfold x = case f x of
Left err -> Fail err
Right Nothing -> Done
Right (Just (e, x')) -> Next e (unfold x')
foldrEntries :: (Entry -> a -> a) -> a -> (String -> a) -> Entries -> a
foldrEntries next done fail' = fold
where
fold (Next e es) = next e (fold es)
fold Done = done
fold (Fail err) = fail' err
foldlEntries :: (a -> Entry -> a) -> a -> Entries -> Either String a
foldlEntries f = fold
where
fold a (Next e es) = (fold $! f a e) es
fold a Done = Right a
fold _ (Fail err) = Left err
mapEntries :: (Entry -> Entry) -> Entries -> Entries
mapEntries f = foldrEntries (Next . f) Done Fail
filterEntries :: (Entry -> Bool) -> Entries -> Entries
filterEntries p =
foldrEntries
(\entry rest -> if p entry
then Next entry rest
else rest)
Done Fail
checkEntries :: (Entry -> Maybe String) -> Entries -> Entries
checkEntries checkEntry =
foldrEntries
(\entry rest -> case checkEntry entry of
Nothing -> Next entry rest
Just err -> Fail err)
Done Fail
entriesIndex :: Entries -> Either String (Map.Map TarPath Entry)
entriesIndex = foldlEntries (\m e -> Map.insert (entryTarPath e) e m) Map.empty
--
-- * Checking
--
-- | This function checks a sequence of tar entries for file name security
-- problems. It checks that:
--
-- * file paths are not absolute
--
-- * file paths do not contain any path components that are \"@..@\"
--
-- * file names are valid
--
-- These checks are from the perspective of the current OS. That means we check
-- for \"@C:\blah@\" files on Windows and \"\/blah\" files on unix. For archive
-- entry types 'HardLink' and 'SymbolicLink' the same checks are done for the
-- link target. A failure in any entry terminates the sequence of entries with
-- an error.
--
checkSecurity :: Entries -> Entries
checkSecurity = checkEntries checkEntrySecurity
checkTarbomb :: FilePath -> Entries -> Entries
checkTarbomb expectedTopDir = checkEntries (checkEntryTarbomb expectedTopDir)
checkEntrySecurity :: Entry -> Maybe String
checkEntrySecurity entry = case entryContent entry of
HardLink link -> check (entryPath entry)
`mplus` check (fromLinkTarget link)
SymbolicLink link -> check (entryPath entry)
`mplus` check (fromLinkTarget link)
_ -> check (entryPath entry)
where
check name
| not (FilePath.Native.isRelative name)
= Just $ "Absolute file name in tar archive: " ++ show name
| not (FilePath.Native.isValid name)
= Just $ "Invalid file name in tar archive: " ++ show name
| any (=="..") (FilePath.Native.splitDirectories name)
= Just $ "Invalid file name in tar archive: " ++ show name
| otherwise = Nothing
checkEntryTarbomb :: FilePath -> Entry -> Maybe String
checkEntryTarbomb _ entry | nonFilesystemEntry = Nothing
where
-- Ignore some special entries we will not unpack anyway
nonFilesystemEntry =
case entryContent entry of
OtherEntryType 'g' _ _ -> True --PAX global header
OtherEntryType 'x' _ _ -> True --PAX individual header
_ -> False
checkEntryTarbomb expectedTopDir entry =
case FilePath.Native.splitDirectories (entryPath entry) of
(topDir:_) | topDir == expectedTopDir -> Nothing
_ -> Just $ "File in tar archive is not in the expected directory "
++ show expectedTopDir
--
-- * Reading
--
read :: ByteString -> Entries
read = unfoldrEntries getEntry
getEntry :: ByteString -> Either String (Maybe (Entry, ByteString))
getEntry bs
| BS.length header < 512 = Left "truncated tar archive"
-- Tar files end with at least two blocks of all '0'. Checking this serves
-- two purposes. It checks the format but also forces the tail of the data
-- which is necessary to close the file if it came from a lazily read file.
| BS.head bs == 0 = case BS.splitAt 1024 bs of
(end, trailing)
| BS.length end /= 1024 -> Left "short tar trailer"
| not (BS.all (== 0) end) -> Left "bad tar trailer"
| not (BS.all (== 0) trailing) -> Left "tar file has trailing junk"
| otherwise -> Right Nothing
| otherwise = partial $ do
case (chksum_, format_) of
(Ok chksum, _ ) | correctChecksum header chksum -> return ()
(Ok _, Ok _) -> fail "tar checksum error"
_ -> fail "data is not in tar format"
-- These fields are partial, have to check them
format <- format_; mode <- mode_;
uid <- uid_; gid <- gid_;
size <- size_; mtime <- mtime_;
devmajor <- devmajor_; devminor <- devminor_;
let content = BS.take size (BS.drop 512 bs)
padding = (512 - size) `mod` 512
bs' = BS.drop (512 + size + padding) bs
entry = Entry {
entryTarPath = TarPath name prefix,
entryContent = case typecode of
'\0' -> NormalFile content size
'0' -> NormalFile content size
'1' -> HardLink (LinkTarget linkname)
'2' -> SymbolicLink (LinkTarget linkname)
'3' -> CharacterDevice devmajor devminor
'4' -> BlockDevice devmajor devminor
'5' -> Directory
'6' -> NamedPipe
'7' -> NormalFile content size
_ -> OtherEntryType typecode content size,
entryPermissions = mode,
entryOwnership = Ownership uname gname uid gid,
entryTime = mtime,
entryFormat = format
}
return (Just (entry, bs'))
where
header = BS.take 512 bs
name = getString 0 100 header
mode_ = getOct 100 8 header
uid_ = getOct 108 8 header
gid_ = getOct 116 8 header
size_ = getOct 124 12 header
mtime_ = getOct 136 12 header
chksum_ = getOct 148 8 header
typecode = getByte 156 header
linkname = getString 157 100 header
magic = getChars 257 8 header
uname = getString 265 32 header
gname = getString 297 32 header
devmajor_ = getOct 329 8 header
devminor_ = getOct 337 8 header
prefix = getString 345 155 header
-- trailing = getBytes 500 12 header
format_ = case magic of
"\0\0\0\0\0\0\0\0" -> return V7Format
"ustar\NUL00" -> return UstarFormat
"ustar \NUL" -> return GnuFormat
_ -> fail "tar entry not in a recognised format"
correctChecksum :: ByteString -> Int -> Bool
correctChecksum header checksum = checksum == checksum'
where
-- sum of all 512 bytes in the header block,
-- treating each byte as an 8-bit unsigned value
checksum' = BS.Char8.foldl' (\x y -> x + ord y) 0 header'
-- treating the 8 bytes of chksum as blank characters.
header' = BS.concat [BS.take 148 header,
BS.Char8.replicate 8 ' ',
BS.drop 156 header]
-- * TAR format primitive input
getOct :: (Integral a, Bits a) => Int64 -> Int64 -> ByteString -> Partial a
getOct off len header
| BS.head bytes == 128 = parseBinInt (BS.unpack (BS.tail bytes))
| null octstr = return 0
| otherwise = case readOct octstr of
[(x,[])] -> return x
_ -> fail "tar header is malformed (bad numeric encoding)"
where
bytes = getBytes off len header
octstr = BS.Char8.unpack
. BS.Char8.takeWhile (\c -> c /= '\NUL' && c /= ' ')
. BS.Char8.dropWhile (== ' ')
$ bytes
-- Some tar programs switch into a binary format when they try to represent
-- field values that will not fit in the required width when using the text
-- octal format. In particular, the UID/GID fields can only hold up to 2^21
-- while in the binary format can hold up to 2^32. The binary format uses
-- '\128' as the header which leaves 7 bytes. Only the last 4 are used.
parseBinInt [0, 0, 0, byte3, byte2, byte1, byte0] =
return $! shiftL (fromIntegral byte3) 24
+ shiftL (fromIntegral byte2) 16
+ shiftL (fromIntegral byte1) 8
+ shiftL (fromIntegral byte0) 0
parseBinInt _ = fail "tar header uses non-standard number encoding"
getBytes :: Int64 -> Int64 -> ByteString -> ByteString
getBytes off len = BS.take len . BS.drop off
getByte :: Int64 -> ByteString -> Char
getByte off bs = BS.Char8.index bs off
getChars :: Int64 -> Int64 -> ByteString -> String
getChars off len = BS.Char8.unpack . getBytes off len
getString :: Int64 -> Int64 -> ByteString -> String
getString off len = BS.Char8.unpack . BS.Char8.takeWhile (/='\0') . getBytes off len
data Partial a = Error String | Ok a
partial :: Partial a -> Either String a
partial (Error msg) = Left msg
partial (Ok x) = Right x
instance Monad Partial where
return = Ok
Error m >>= _ = Error m
Ok x >>= k = k x
fail = Error
--
-- * Writing
--
-- | Create the external representation of a tar archive by serialising a list
-- of tar entries.
--
-- * The conversion is done lazily.
--
write :: [Entry] -> ByteString
write es = BS.concat $ map putEntry es ++ [BS.replicate (512*2) 0]
-- | Same as 'write', but for 'Entries'.
writeEntries :: Entries -> ByteString
writeEntries entries = BS.concat $ foldrEntries (\e res -> (putEntry e):res)
[BS.replicate (512*2) 0] error entries
putEntry :: Entry -> ByteString
putEntry entry = case entryContent entry of
NormalFile content size -> BS.concat [ header, content, padding size ]
OtherEntryType _ content size -> BS.concat [ header, content, padding size ]
_ -> header
where
header = putHeader entry
padding size = BS.replicate paddingSize 0
where paddingSize = fromIntegral (negate size `mod` 512)
putHeader :: Entry -> ByteString
putHeader entry =
BS.concat $ [ BS.take 148 block
, BS.Char8.pack $ putOct 7 checksum
, BS.Char8.singleton ' '
, BS.drop 156 block ]
where
-- putHeaderNoChkSum returns a String, so we convert it to the final
-- representation before calculating the checksum.
block = BS.Char8.pack . putHeaderNoChkSum $ entry
checksum = BS.Char8.foldl' (\x y -> x + ord y) 0 block
putHeaderNoChkSum :: Entry -> String
putHeaderNoChkSum Entry {
entryTarPath = TarPath name prefix,
entryContent = content,
entryPermissions = permissions,
entryOwnership = ownership,
entryTime = modTime,
entryFormat = format
} =
concat
[ putString 100 $ name
, putOct 8 $ permissions
, putOct 8 $ ownerId ownership
, putOct 8 $ groupId ownership
, putOct 12 $ contentSize
, putOct 12 $ modTime
, fill 8 $ ' ' -- dummy checksum
, putChar8 $ typeCode
, putString 100 $ linkTarget
] ++
case format of
V7Format ->
fill 255 '\NUL'
UstarFormat -> concat
[ putString 8 $ "ustar\NUL00"
, putString 32 $ ownerName ownership
, putString 32 $ groupName ownership
, putOct 8 $ deviceMajor
, putOct 8 $ deviceMinor
, putString 155 $ prefix
, fill 12 $ '\NUL'
]
GnuFormat -> concat
[ putString 8 $ "ustar \NUL"
, putString 32 $ ownerName ownership
, putString 32 $ groupName ownership
, putGnuDev 8 $ deviceMajor
, putGnuDev 8 $ deviceMinor
, putString 155 $ prefix
, fill 12 $ '\NUL'
]
where
(typeCode, contentSize, linkTarget,
deviceMajor, deviceMinor) = case content of
NormalFile _ size -> ('0' , size, [], 0, 0)
Directory -> ('5' , 0, [], 0, 0)
SymbolicLink (LinkTarget link) -> ('2' , 0, link, 0, 0)
HardLink (LinkTarget link) -> ('1' , 0, link, 0, 0)
CharacterDevice major minor -> ('3' , 0, [], major, minor)
BlockDevice major minor -> ('4' , 0, [], major, minor)
NamedPipe -> ('6' , 0, [], 0, 0)
OtherEntryType code _ size -> (code, size, [], 0, 0)
putGnuDev w n = case content of
CharacterDevice _ _ -> putOct w n
BlockDevice _ _ -> putOct w n
_ -> replicate w '\NUL'
-- * TAR format primitive output
type FieldWidth = Int
putString :: FieldWidth -> String -> String
putString n s = take n s ++ fill (n - length s) '\NUL'
--TODO: check integer widths, eg for large file sizes
putOct :: (Show a, Integral a) => FieldWidth -> a -> String
putOct n x =
let octStr = take (n-1) $ showOct x ""
in fill (n - length octStr - 1) '0'
++ octStr
++ putChar8 '\NUL'
putChar8 :: Char -> String
putChar8 c = [c]
fill :: FieldWidth -> Char -> String
fill n c = replicate n c
--
-- * Unpacking
--
unpack :: FilePath -> Entries -> IO ()
unpack baseDir entries = unpackEntries [] (checkSecurity entries)
>>= emulateLinks
where
-- We're relying here on 'checkSecurity' to make sure we're not scribbling
-- files all over the place.
unpackEntries _ (Fail err) = fail err
unpackEntries links Done = return links
unpackEntries links (Next entry es) = case entryContent entry of
NormalFile file _ -> extractFile entry path file
>> unpackEntries links es
Directory -> extractDir path
>> unpackEntries links es
HardLink link -> (unpackEntries $! saveLink path link links) es
SymbolicLink link -> (unpackEntries $! saveLink path link links) es
_ -> unpackEntries links es --ignore other file types
where
path = entryPath entry
extractFile entry path content = do
-- Note that tar archives do not make sure each directory is created
-- before files they contain, indeed we may have to create several
-- levels of directory.
createDirectoryIfMissing True absDir
BS.writeFile absPath content
when (isExecutable (entryPermissions entry))
(setFileExecutable absPath)
where
absDir = baseDir </> FilePath.Native.takeDirectory path
absPath = baseDir </> path
extractDir path = createDirectoryIfMissing True (baseDir </> path)
saveLink path link links = seq (length path)
$ seq (length link')
$ (path, link'):links
where link' = fromLinkTarget link
emulateLinks = mapM_ $ \(relPath, relLinkTarget) ->
let absPath = baseDir </> relPath
absTarget = FilePath.Native.takeDirectory absPath </> relLinkTarget
in copyFile absTarget absPath
--
-- * Packing
--
pack :: FilePath -- ^ Base directory
-> [FilePath] -- ^ Files and directories to pack, relative to the base dir
-> IO [Entry]
pack baseDir paths0 = preparePaths baseDir paths0 >>= packPaths baseDir
preparePaths :: FilePath -> [FilePath] -> IO [FilePath]
preparePaths baseDir paths =
fmap concat $ interleave
[ do isDir <- doesDirectoryExist (baseDir </> path)
if isDir
then do entries <- getDirectoryContentsRecursive (baseDir </> path)
return (FilePath.Native.addTrailingPathSeparator path
: map (path </>) entries)
else return [path]
| path <- paths ]
packPaths :: FilePath -> [FilePath] -> IO [Entry]
packPaths baseDir paths =
interleave
[ do tarpath <- either fail return (toTarPath isDir relpath)
if isDir then packDirectoryEntry filepath tarpath
else packFileEntry filepath tarpath
| relpath <- paths
, let isDir = FilePath.Native.hasTrailingPathSeparator filepath
filepath = baseDir </> relpath ]
interleave :: [IO a] -> IO [a]
interleave = unsafeInterleaveIO . go
where
go [] = return []
go (x:xs) = do
x' <- x
xs' <- interleave xs
return (x':xs')
packFileEntry :: FilePath -- ^ Full path to find the file on the local disk
-> TarPath -- ^ Path to use for the tar Entry in the archive
-> IO Entry
packFileEntry filepath tarpath = do
mtime <- getModTime filepath
perms <- getPermissions filepath
file <- openBinaryFile filepath ReadMode
size <- hFileSize file
content <- BS.hGetContents file
return (simpleEntry tarpath (NormalFile content (fromIntegral size))) {
entryPermissions = if Permissions.executable perms
then executableFilePermissions
else ordinaryFilePermissions,
entryTime = mtime
}
packDirectoryEntry :: FilePath -- ^ Full path to find the file on the local disk
-> TarPath -- ^ Path to use for the tar Entry in the archive
-> IO Entry
packDirectoryEntry filepath tarpath = do
mtime <- getModTime filepath
return (directoryEntry tarpath) {
entryTime = mtime
}
getDirectoryContentsRecursive :: FilePath -> IO [FilePath]
getDirectoryContentsRecursive dir0 =
fmap tail (recurseDirectories dir0 [""])
recurseDirectories :: FilePath -> [FilePath] -> IO [FilePath]
recurseDirectories _ [] = return []
recurseDirectories base (dir:dirs) = unsafeInterleaveIO $ do
(files, dirs') <- collect [] [] =<< getDirectoryContents (base </> dir)
files' <- recurseDirectories base (dirs' ++ dirs)
return (dir : files ++ files')
where
collect files dirs' [] = return (reverse files, reverse dirs')
collect files dirs' (entry:entries) | ignore entry
= collect files dirs' entries
collect files dirs' (entry:entries) = do
let dirEntry = dir </> entry
dirEntry' = FilePath.Native.addTrailingPathSeparator dirEntry
isDirectory <- doesDirectoryExist (base </> dirEntry)
if isDirectory
then collect files (dirEntry':dirs') entries
else collect (dirEntry:files) dirs' entries
ignore ['.'] = True
ignore ['.', '.'] = True
ignore _ = False
getModTime :: FilePath -> IO EpochTime
getModTime path = do
(TOD s _) <- getModificationTime path
return $! fromIntegral s
Jump to Line
Something went wrong with that request. Please try again.