forked from DIRACGrid/DIRAC
-
Notifications
You must be signed in to change notification settings - Fork 0
/
PoolXMLFile.py
143 lines (103 loc) · 4.72 KB
/
PoolXMLFile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
""" The POOL XML File module provides a means to extract the GUID of a file or list
of files by searching for an appropriate POOL XML Catalog in the specified directory.
"""
import os
import glob
import tarfile
from DIRAC import S_OK, S_ERROR, gLogger
from DIRAC.Resources.Catalog.PoolXMLCatalog import PoolXMLCatalog
from DIRAC.Core.Utilities.List import uniqueElements
from DIRAC.Core.Utilities.File import makeGuid
#############################################################################
def getGUID(fileNames, directory=""):
"""This function searches the directory for POOL XML catalog files and extracts the GUID.
fileNames can be a string or a list, directory defaults to PWD.
"""
if not directory:
directory = os.getcwd()
if not os.path.isdir(directory):
return S_ERROR(f"{directory} is not a directory")
if not isinstance(fileNames, list):
fileNames = [fileNames]
gLogger.verbose(f"Will look for POOL XML Catalog GUIDs in {directory} for {', '.join(fileNames)}")
finalCatList = _getPoolCatalogs(directory)
# Create POOL catalog with final list of catalog files and extract GUIDs
generated = []
pfnGUIDs = {}
catalog = PoolXMLCatalog(finalCatList)
for fname in fileNames:
guid = str(catalog.getGuidByPfn(fname))
if not guid:
guid = makeGuid(fname)
generated.append(fname)
pfnGUIDs[fname] = guid
if not generated:
gLogger.info(f"Found GUIDs from POOL XML Catalogue for all files: {', '.join(fileNames)}")
else:
gLogger.info(f"GUIDs not found from POOL XML Catalogue (and were generated) for: {', '.join(generated)}")
result = S_OK(pfnGUIDs)
result["directory"] = directory
result["generated"] = generated
return result
#############################################################################
def getType(fileNames, directory=""):
"""This function searches the directory for POOL XML catalog files and extracts the type of the pfn.
fileNames can be a string or a list, directory defaults to PWD.
"""
if not directory:
directory = os.getcwd()
if not os.path.isdir(directory):
return S_ERROR(f"{directory} is not a directory")
if not isinstance(fileNames, list):
fileNames = [fileNames]
gLogger.verbose(f"Will look for POOL XML Catalog file types in {directory} for {', '.join(fileNames)}")
finalCatList = _getPoolCatalogs(directory)
# Create POOL catalog with final list of catalog files and extract GUIDs
generated = []
pfnTypes = {}
catalog = PoolXMLCatalog(finalCatList)
for fname in fileNames:
typeFile = str(catalog.getTypeByPfn(fname))
if not typeFile:
typeFile = "ROOT"
generated.append(fname)
pfnTypes[fname] = typeFile
if not generated:
gLogger.info(f"Found Types from POOL XML Catalogue for all files: {', '.join(fileNames)}")
else:
gLogger.info(f"GUIDs not found from POOL XML Catalogue (and were generated) for: {', '.join(generated)}")
result = S_OK(pfnTypes)
result["directory"] = directory
result["generated"] = generated
return result
#############################################################################
def _getPoolCatalogs(directory=""):
patterns = ["*.xml", "*.xml*gz"]
omissions = [r"\.bak$"] # to be ignored for production files
# First obtain valid list of unpacked catalog files in directory
poolCatalogList = []
for pattern in patterns:
fileList = glob.glob(os.path.join(directory, pattern))
for fname in fileList:
if fname.endswith(".bak"):
gLogger.verbose(f"Ignoring BAK file: {fname}")
elif tarfile.is_tarfile(fname):
gLogger.debug(f"Unpacking catalog XML file {os.path.join(directory, fname)}")
with tarfile.open(os.path.join(directory, fname), "r") as tf:
for member in tf.getmembers():
tf.extract(member, directory)
poolCatalogList.append(os.path.join(directory, member.name))
else:
poolCatalogList.append(fname)
poolCatalogList = uniqueElements(poolCatalogList)
# Now have list of all XML files but some may not be Pool XML catalogs...
finalCatList = []
for possibleCat in poolCatalogList:
try:
_cat = PoolXMLCatalog(possibleCat)
finalCatList.append(possibleCat)
except Exception as x:
gLogger.debug(f"Ignoring non-POOL catalogue file {possibleCat}")
gLogger.debug(f"Final list of catalog files are: {', '.join(finalCatList)}")
return finalCatList
#############################################################################