-
Notifications
You must be signed in to change notification settings - Fork 22
/
Archive.hs
531 lines (497 loc) · 20.5 KB
/
Archive.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE ScopedTypeVariables #-}
-- | Logic for loading up trees from HTTPS archives.
module Pantry.Archive
( getArchivePackage
, getArchive
, getArchiveKey
, fetchArchivesRaw
, fetchArchives
) where
import RIO
import qualified Pantry.SHA256 as SHA256
import Pantry.Storage hiding (Tree, TreeEntry)
import Pantry.Tree
import Pantry.Types
import RIO.Process
import Pantry.Internal (normalizeParents, makeTarRelative)
import qualified RIO.Text as T
import qualified RIO.Text.Partial as T
import qualified RIO.List as List
import qualified RIO.ByteString.Lazy as BL
import qualified RIO.Map as Map
import qualified RIO.Set as Set
import qualified Hpack.Config as Hpack
import Pantry.HPack (hpackVersion)
import Data.Bits ((.&.), shiftR)
import Path (toFilePath)
import qualified Codec.Archive.Zip as Zip
import qualified Data.Digest.CRC32 as CRC32
import Distribution.PackageDescription (packageDescription, package)
import Conduit
import Data.Conduit.Zlib (ungzip)
import qualified Data.Conduit.Tar as Tar
import Pantry.HTTP
fetchArchivesRaw
:: (HasPantryConfig env, HasLogFunc env, HasProcessContext env)
=> [(RawArchive, RawPackageMetadata)]
-> RIO env ()
fetchArchivesRaw pairs =
for_ pairs $ \(ra, rpm) ->
getArchive (RPLIArchive ra rpm) ra rpm
fetchArchives
:: (HasPantryConfig env, HasLogFunc env, HasProcessContext env)
=> [(Archive, PackageMetadata)]
-> RIO env ()
fetchArchives pairs =
-- TODO be more efficient, group together shared archives
fetchArchivesRaw [
let PackageIdentifier nm ver = pmIdent pm
rpm = RawPackageMetadata (Just nm) (Just ver) (Just $ pmTreeKey pm) (Just $ pmCabal pm)
ra = RawArchive (archiveLocation a) (Just $ archiveHash a) (Just $ archiveSize a) (archiveSubdir a)
in (ra, rpm)
| (a, pm) <- pairs]
getArchiveKey
:: forall env. (HasPantryConfig env, HasLogFunc env, HasProcessContext env)
=> RawPackageLocationImmutable -- ^ for exceptions
-> RawArchive
-> RawPackageMetadata
-> RIO env TreeKey
getArchiveKey rpli archive rpm =
packageTreeKey <$> getArchivePackage rpli archive rpm -- potential optimization
thd3 :: (a, b, c) -> c
thd3 (_, _, z) = z
getArchivePackage
:: forall env. (HasPantryConfig env, HasLogFunc env, HasProcessContext env, HasCallStack)
=> RawPackageLocationImmutable -- ^ for exceptions
-> RawArchive
-> RawPackageMetadata
-> RIO env Package
getArchivePackage rpli archive rpm = thd3 <$> getArchive rpli archive rpm
getArchive
:: forall env. (HasPantryConfig env, HasLogFunc env, HasProcessContext env, HasCallStack)
=> RawPackageLocationImmutable -- ^ for exceptions
-> RawArchive
-> RawPackageMetadata
-> RIO env (SHA256, FileSize, Package)
getArchive rpli archive rpm = do
-- Check if the value is in the archive, and use it if possible
mcached <- loadCache rpli archive
cached@(_, _, pa) <-
case mcached of
Just stored -> pure stored
-- Not in the archive. Load the archive. Completely ignore the
-- PackageMetadata for now, we'll check that the Package
-- info matches next.
Nothing -> withArchiveLoc archive $ \fp sha size -> do
pa <- parseArchive rpli archive fp
-- Storing in the cache exclusively uses information we have
-- about the archive itself, not metadata from the user.
storeCache archive sha size pa
pure (sha, size, pa)
either throwIO (\_ -> pure cached) $ checkPackageMetadata rpli rpm pa
storeCache
:: forall env. (HasPantryConfig env, HasLogFunc env)
=> RawArchive
-> SHA256
-> FileSize
-> Package
-> RIO env ()
storeCache archive sha size pa =
case raLocation archive of
ALUrl url -> withStorage $ storeArchiveCache url (raSubdir archive) sha size (packageTreeKey pa)
ALFilePath _ -> pure () -- TODO cache local as well
loadCache
:: forall env. (HasPantryConfig env, HasLogFunc env, HasProcessContext env)
=> RawPackageLocationImmutable
-> RawArchive
-> RIO env (Maybe (SHA256, FileSize, Package))
loadCache rpli archive =
case loc of
ALFilePath _ -> pure Nothing -- TODO can we do something intelligent here?
ALUrl url -> withStorage (loadArchiveCache url (raSubdir archive)) >>= loop
where
loc = raLocation archive
msha = raHash archive
msize = raSize archive
loadFromCache :: TreeId -> RIO env (Maybe Package)
loadFromCache tid = fmap Just $ withStorage $ loadPackageById rpli tid
loop [] = pure Nothing
loop ((sha, size, tid):rest) =
case msha of
Nothing -> do
case msize of
Just size' | size /= size' -> loop rest
_ -> do
case loc of
ALUrl url -> do
logWarn $ "Using archive from " <> display url <> " without a specified cryptographic hash"
logWarn $ "Cached hash is " <> display sha <> ", file size " <> display size
logWarn "For security and reproducibility, please add a hash and file size to your configuration"
ALFilePath _ -> pure ()
fmap (sha, size,) <$> loadFromCache tid
Just sha'
| sha == sha' ->
case msize of
Nothing -> do
case loc of
ALUrl url -> do
logWarn $ "Archive from " <> display url <> " does not specify a size"
logWarn $ "To avoid an overflow attack, please add the file size to your configuration: " <> display size
ALFilePath _ -> pure ()
fmap (sha, size,) <$> loadFromCache tid
Just size'
| size == size' -> fmap (sha, size,) <$> loadFromCache tid
| otherwise -> do
logWarn $ "Archive from " <> display loc <> " has a matching hash but mismatched size"
logWarn "Please verify that your configuration provides the correct size"
loop rest
| otherwise -> loop rest
-- ensure name, version, etc are correct
checkPackageMetadata
:: RawPackageLocationImmutable
-> RawPackageMetadata
-> Package
-> Either PantryException Package
checkPackageMetadata pl pm pa = do
let
pkgCabal = case packageCabalEntry pa of
PCCabalFile tentry -> tentry
PCHpack phpack -> phGenerated phpack
err = MismatchedPackageMetadata
pl
pm
(Just (packageTreeKey pa))
(teBlob pkgCabal)
(packageIdent pa)
test (Just x) y = x == y
test Nothing _ = True
tests =
[ test (rpmTreeKey pm) (packageTreeKey pa)
, test (rpmName pm) (pkgName $ packageIdent pa)
, test (rpmVersion pm) (pkgVersion $ packageIdent pa)
, test (rpmCabal pm) (teBlob pkgCabal)
]
in if and tests then Right pa else Left err
-- | Provide a local file with the contents of the archive, regardless
-- of where it comes from. Perform SHA256 and file size validation if
-- downloading.
withArchiveLoc
:: HasLogFunc env
=> RawArchive
-> (FilePath -> SHA256 -> FileSize -> RIO env a)
-> RIO env a
withArchiveLoc (RawArchive (ALFilePath resolved) msha msize _subdir) f = do
let abs' = resolvedAbsolute resolved
fp = toFilePath abs'
(sha, size) <- withBinaryFile fp ReadMode $ \h -> do
size <- FileSize . fromIntegral <$> hFileSize h
for_ msize $ \size' -> when (size /= size') $ throwIO $ LocalInvalidSize abs' Mismatch
{ mismatchExpected = size'
, mismatchActual = size
}
sha <- runConduit (sourceHandle h .| SHA256.sinkHash)
for_ msha $ \sha' -> when (sha /= sha') $ throwIO $ LocalInvalidSHA256 abs' Mismatch
{ mismatchExpected = sha'
, mismatchActual = sha
}
pure (sha, size)
f fp sha size
withArchiveLoc (RawArchive (ALUrl url) msha msize _subdir) f =
withSystemTempFile "archive" $ \fp hout -> do
logDebug $ "Downloading archive from " <> display url
(sha, size, ()) <- httpSinkChecked url msha msize (sinkHandle hout)
hClose hout
f fp sha size
data ArchiveType = ATTarGz | ATTar | ATZip
deriving (Enum, Bounded)
instance Display ArchiveType where
display ATTarGz = "GZIP-ed tar file"
display ATTar = "Uncompressed tar file"
display ATZip = "Zip file"
data METype
= METNormal
| METExecutable
| METLink !FilePath
deriving Show
data MetaEntry = MetaEntry
{ mePath :: !FilePath
, meType :: !METype
}
deriving Show
foldArchive
:: (HasPantryConfig env, HasLogFunc env)
=> ArchiveLocation -- ^ for error reporting
-> FilePath
-> ArchiveType
-> a
-> (a -> MetaEntry -> ConduitT ByteString Void (RIO env) a)
-> RIO env a
foldArchive loc fp ATTarGz accum f =
withSourceFile fp $ \src -> runConduit $ src .| ungzip .| foldTar loc accum f
foldArchive loc fp ATTar accum f =
withSourceFile fp $ \src -> runConduit $ src .| foldTar loc accum f
foldArchive loc fp ATZip accum0 f = withBinaryFile fp ReadMode $ \h -> do
let go accum entry = do
let me = MetaEntry (Zip.eRelativePath entry) met
met = fromMaybe METNormal $ do
let modes = shiftR (Zip.eExternalFileAttributes entry) 16
guard $ Zip.eVersionMadeBy entry .&. 0xFF00 == 0x0300
guard $ modes /= 0
Just $
if (modes .&. 0o100) == 0
then METNormal
else METExecutable
lbs = Zip.fromEntry entry
let crcExpected = Zip.eCRC32 entry
crcActual = CRC32.crc32 lbs
when (crcExpected /= crcActual)
$ throwIO $ CRC32Mismatch loc (Zip.eRelativePath entry) Mismatch
{ mismatchExpected = crcExpected
, mismatchActual = crcActual
}
runConduit $ sourceLazy lbs .| f accum me
isDir entry =
case reverse $ Zip.eRelativePath entry of
'/':_ -> True
_ -> False
-- We're entering lazy I/O land thanks to zip-archive.
lbs <- BL.hGetContents h
foldM go accum0 (filter (not . isDir) $ Zip.zEntries $ Zip.toArchive lbs)
foldTar
:: (HasPantryConfig env, HasLogFunc env)
=> ArchiveLocation -- ^ for exceptions
-> a
-> (a -> MetaEntry -> ConduitT ByteString o (RIO env) a)
-> ConduitT ByteString o (RIO env) a
foldTar loc accum0 f = do
ref <- newIORef accum0
Tar.untar $ \fi -> toME fi >>= traverse_ (\me -> do
accum <- readIORef ref
accum' <- f accum me
writeIORef ref $! accum')
readIORef ref
where
toME :: MonadIO m => Tar.FileInfo -> m (Maybe MetaEntry)
toME fi = do
let exc = InvalidTarFileType loc (Tar.getFileInfoPath fi) (Tar.fileType fi)
mmet <-
case Tar.fileType fi of
Tar.FTSymbolicLink bs ->
case decodeUtf8' bs of
Left _ -> throwIO exc
Right text -> pure $ Just $ METLink $ T.unpack text
Tar.FTNormal -> pure $ Just $
if Tar.fileMode fi .&. 0o100 /= 0
then METExecutable
else METNormal
Tar.FTDirectory -> pure Nothing
_ -> throwIO exc
pure $
(\met -> MetaEntry
{ mePath = Tar.getFileInfoPath fi
, meType = met
})
<$> mmet
data SimpleEntry = SimpleEntry
{ seSource :: !FilePath
, seType :: !FileType
}
deriving Show
-- | Attempt to parse the contents of the given archive in the given
-- subdir into a 'Tree'. This will not consult any caches. It will
-- ensure that:
--
-- * The cabal file exists
--
-- * The cabal file can be parsed
--
-- * The name inside the cabal file matches the name of the cabal file itself
parseArchive
:: (HasPantryConfig env, HasLogFunc env, HasProcessContext env)
=> RawPackageLocationImmutable
-> RawArchive
-> FilePath -- ^ file holding the archive
-> RIO env Package
parseArchive rpli archive fp = do
let loc = raLocation archive
getFiles [] = throwIO $ UnknownArchiveType loc
getFiles (at:ats) = do
eres <- tryAny $ foldArchive loc fp at id $ \m me -> pure $ m . (me:)
case eres of
Left e -> do
logDebug $ "parseArchive of " <> display at <> ": " <> displayShow e
getFiles ats
Right files -> pure (at, Map.fromList $ map (mePath &&& id) $ files [])
(at :: ArchiveType, files :: Map FilePath MetaEntry) <- getFiles [minBound..maxBound]
let toSimple :: FilePath -> MetaEntry -> Either String (Map FilePath SimpleEntry)
toSimple key me =
case meType me of
METNormal -> Right $ Map.singleton key $ SimpleEntry (mePath me) FTNormal
METExecutable -> Right $ Map.singleton key $ SimpleEntry (mePath me) FTExecutable
METLink relDest -> do
case relDest of
'/':_ -> Left $ concat
[ "File located at "
, show $ mePath me
, " is a symbolic link to absolute path "
, relDest
]
_ -> Right ()
dest0 <-
case makeTarRelative (mePath me) relDest of
Left e -> Left $ concat
[ "Error resolving relative path "
, relDest
, " from symlink at "
, mePath me
, ": "
, e
]
Right x -> Right x
dest <-
case normalizeParents dest0 of
Left e -> Left $ concat
[ "Invalid symbolic link from "
, mePath me
, " to "
, relDest
, ", tried parsing "
, dest0
, ": "
, e
]
Right x -> Right x
-- Check if it's a symlink to a file
case Map.lookup dest files of
Nothing ->
-- Check if it's a symlink to a directory
case findWithPrefix dest files of
[] -> Left $ "Symbolic link dest not found from " ++ mePath me ++ " to " ++ relDest ++ ", looking for " ++ dest ++ ".\n"
++ "This may indicate that the source is a git archive which uses git-annex.\n"
++ "See https://github.com/commercialhaskell/stack/issues/4579 for further information."
pairs -> fmap fold $ for pairs $ \(suffix, me') -> toSimple (key ++ '/' : suffix) me'
Just me' ->
case meType me' of
METNormal -> Right $ Map.singleton key $ SimpleEntry dest FTNormal
METExecutable -> Right $ Map.singleton key $ SimpleEntry dest FTExecutable
METLink _ -> Left $ "Symbolic link dest cannot be a symbolic link, from " ++ mePath me ++ " to " ++ relDest
case fold <$> Map.traverseWithKey toSimple files of
Left e -> throwIO $ UnsupportedTarball loc $ T.pack e
Right files1 -> do
let files2 = stripCommonPrefix $ Map.toList files1
files3 = takeSubdir (raSubdir archive) files2
toSafe (fp', a) =
case mkSafeFilePath fp' of
Nothing -> Left $ "Not a safe file path: " ++ show fp'
Just sfp -> Right (sfp, a)
case traverse toSafe files3 of
Left e -> throwIO $ UnsupportedTarball loc $ T.pack e
Right safeFiles -> do
let toSave = Set.fromList $ map (seSource . snd) safeFiles
(blobs :: Map FilePath BlobKey) <-
foldArchive loc fp at mempty $ \m me ->
if mePath me `Set.member` toSave
then do
bs <- mconcat <$> sinkList
(_, blobKey) <- lift $ withStorage $ storeBlob bs
pure $ Map.insert (mePath me) blobKey m
else pure m
tree <- fmap (TreeMap . Map.fromList) $ for safeFiles $ \(sfp, se) ->
case Map.lookup (seSource se) blobs of
Nothing -> error $ "Impossible: blob not found for: " ++ seSource se
Just blobKey -> pure (sfp, TreeEntry blobKey (seType se))
-- parse the cabal file and ensure it has the right name
buildFile <- findCabalOrHpackFile rpli tree
(buildFilePath, buildFileBlobKey, buildFileEntry) <- case buildFile of
BFCabal fpath te@(TreeEntry key _) -> pure (fpath, key, te)
BFHpack te@(TreeEntry key _) -> pure (hpackSafeFilePath, key, te)
mbs <- withStorage $ loadBlob buildFileBlobKey
bs <-
case mbs of
Nothing -> throwIO $ TreeReferencesMissingBlob rpli buildFilePath buildFileBlobKey
Just bs -> pure bs
cabalBs <- case buildFile of
BFCabal _ _ -> pure bs
BFHpack _ -> snd <$> hpackToCabal rpli tree
(_warnings, gpd) <- rawParseGPD (Left rpli) cabalBs
let ident@(PackageIdentifier name _) = package $ packageDescription gpd
case buildFile of
BFCabal _ _ -> when (buildFilePath /= cabalFileName name) $ throwIO $ WrongCabalFileName rpli buildFilePath name
_ -> return ()
-- It's good! Store the tree, let's bounce
(tid, treeKey') <- withStorage $ storeTree rpli ident tree buildFile
packageCabal <- case buildFile of
BFCabal _ _ -> pure $ PCCabalFile buildFileEntry
BFHpack _ -> do
cabalKey <- withStorage $ do
hpackId <- storeHPack rpli tid
loadCabalBlobKey hpackId
hpackSoftwareVersion <- hpackVersion
let cabalTreeEntry = TreeEntry cabalKey (teType buildFileEntry)
pure $ PCHpack $ PHpack { phOriginal = buildFileEntry, phGenerated = cabalTreeEntry, phVersion = hpackSoftwareVersion}
pure Package
{ packageTreeKey = treeKey'
, packageTree = tree
, packageCabalEntry = packageCabal
, packageIdent = ident
}
-- | Find all of the files in the Map with the given directory as a
-- prefix. Directory is given without trailing slash. Returns the
-- suffix after stripping the given prefix.
findWithPrefix :: FilePath -> Map FilePath MetaEntry -> [(FilePath, MetaEntry)]
findWithPrefix dir = mapMaybe go . Map.toList
where
prefix = dir ++ "/"
go (x, y) = (, y) <$> List.stripPrefix prefix x
findCabalOrHpackFile
:: MonadThrow m
=> RawPackageLocationImmutable -- ^ for exceptions
-> Tree
-> m BuildFile
findCabalOrHpackFile loc (TreeMap m) = do
let isCabalFile (sfp, _) =
let txt = unSafeFilePath sfp
in not ("/" `T.isInfixOf` txt) && (".cabal" `T.isSuffixOf` txt)
isHpackFile (sfp, _) =
let txt = unSafeFilePath sfp
in T.pack (Hpack.packageConfig) == txt
isBFCabal (BFCabal _ _) = True
isBFCabal _ = False
sfpBuildFile (BFCabal sfp _) = sfp
sfpBuildFile (BFHpack _) = hpackSafeFilePath
toBuildFile xs@(sfp, te) = let cbFile = if (isCabalFile xs)
then Just $ BFCabal sfp te
else Nothing
hpFile = if (isHpackFile xs)
then Just $ BFHpack te
else Nothing
in cbFile <|> hpFile
case mapMaybe toBuildFile $ Map.toList m of
[] -> throwM $ TreeWithoutCabalFile loc
[bfile] -> pure bfile
xs -> case (filter isBFCabal xs) of
[] -> throwM $ TreeWithoutCabalFile loc
[bfile] -> pure bfile
xs' -> throwM $ TreeWithMultipleCabalFiles loc $ map sfpBuildFile xs'
-- | If all files have a shared prefix, strip it off
stripCommonPrefix :: [(FilePath, a)] -> [(FilePath, a)]
stripCommonPrefix [] = []
stripCommonPrefix pairs@((firstFP, _):_) = fromMaybe pairs $ do
let firstDir = takeWhile (/= '/') firstFP
guard $ not $ null firstDir
let strip (fp, a) = (, a) <$> List.stripPrefix (firstDir ++ "/") fp
stripCommonPrefix <$> traverse strip pairs
-- | Take us down to the specified subdirectory
takeSubdir
:: Text -- ^ subdir
-> [(FilePath, a)] -- ^ files after stripping common prefix
-> [(Text, a)]
takeSubdir subdir = mapMaybe $ \(fp, a) -> do
stripped <- List.stripPrefix subdirs $ splitDirs $ T.pack fp
Just (T.intercalate "/" stripped, a)
where
splitDirs = List.dropWhile (== ".") . filter (/= "") . T.splitOn "/"
subdirs = splitDirs subdir