Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 375 lines (287 sloc) 11.266 kb
354c23e first attempt to merge the DEVEL_BRANCH changes
tavis_rudd authored
1 #!/usr/bin/env python
0fddf36 license note
tavis_rudd authored
2 # $Id: FileUtils.py,v 1.12 2005/11/02 22:26:07 tavis_rudd Exp $
354c23e first attempt to merge the DEVEL_BRANCH changes
tavis_rudd authored
3 """File utitilies for Python:
4
5 Meta-Data
6 ================================================================================
7fd2570 changed my email address over from calrudd.com to damnsimple.com
tavis_rudd authored
7 Author: Tavis Rudd <tavis@damnsimple.com>
354c23e first attempt to merge the DEVEL_BRANCH changes
tavis_rudd authored
8 License: This software is released for unlimited distribution under the
0fddf36 license note
tavis_rudd authored
9 terms of the MIT license. See the LICENSE file.
10 Version: $Revision: 1.12 $
354c23e first attempt to merge the DEVEL_BRANCH changes
tavis_rudd authored
11 Start Date: 2001/09/26
0fddf36 license note
tavis_rudd authored
12 Last Revision Date: $Date: 2005/11/02 22:26:07 $
354c23e first attempt to merge the DEVEL_BRANCH changes
tavis_rudd authored
13 """
7fd2570 changed my email address over from calrudd.com to damnsimple.com
tavis_rudd authored
14 __author__ = "Tavis Rudd <tavis@damnsimple.com>"
0fddf36 license note
tavis_rudd authored
15 __revision__ = "$Revision: 1.12 $"[11:-2]
354c23e first attempt to merge the DEVEL_BRANCH changes
tavis_rudd authored
16
17
18 from glob import glob
19 import os
20 from os import listdir
21 import os.path
22 import re
23 from types import StringType
24 from tempfile import mktemp
25
26 def _escapeRegexChars(txt,
27 escapeRE=re.compile(r'([\$\^\*\+\.\?\{\}\[\]\(\)\|\\])')):
28 return escapeRE.sub(r'\\\1' , txt)
29
30 def findFiles(*args, **kw):
31 """Recursively find all the files matching a glob pattern.
32
33 This function is a wrapper around the FileFinder class. See its docstring
34 for details about the accepted arguments, etc."""
35
36 return FileFinder(*args, **kw).files()
37
38 def replaceStrInFiles(files, theStr, repl):
39
40 """Replace all instances of 'theStr' with 'repl' for each file in the 'files'
41 list. Returns a dictionary with data about the matches found.
42
43 This is like string.replace() on a multi-file basis.
44
45 This function is a wrapper around the FindAndReplace class. See its
46 docstring for more details."""
47
48 pattern = _escapeRegexChars(theStr)
49 return FindAndReplace(files, pattern, repl).results()
50
51 def replaceRegexInFiles(files, pattern, repl):
52
53 """Replace all instances of regex 'pattern' with 'repl' for each file in the
54 'files' list. Returns a dictionary with data about the matches found.
55
56 This is like re.sub on a multi-file basis.
57
58 This function is a wrapper around the FindAndReplace class. See its
59 docstring for more details."""
60
61 return FindAndReplace(files, pattern, repl).results()
62
63
64 ##################################################
65 ## CLASSES
66
67 class FileFinder:
68
69 """Traverses a directory tree and finds all files in it that match one of
70 the specified glob patterns."""
71
72 def __init__(self, rootPath,
73 globPatterns=('*',),
4f39467 added new basename to ignore
tavis_rudd authored
74 ignoreBasenames=('CVS','.svn'),
354c23e first attempt to merge the DEVEL_BRANCH changes
tavis_rudd authored
75 ignoreDirs=(),
76 ):
77
78 self._rootPath = rootPath
79 self._globPatterns = globPatterns
80 self._ignoreBasenames = ignoreBasenames
81 self._ignoreDirs = ignoreDirs
82 self._files = []
83
84 self.walkDirTree(rootPath)
85
86 def walkDirTree(self, dir='.',
87
88 listdir=os.listdir,
89 isdir=os.path.isdir,
90 join=os.path.join,
91 ):
92
93 """Recursively walk through a directory tree and find matching files."""
94 processDir = self.processDir
95 filterDir = self.filterDir
96
97 pendingDirs = [dir]
98 addDir = pendingDirs.append
99 getDir = pendingDirs.pop
100
101 while pendingDirs:
102 dir = getDir()
013e417 Cheetah's current version of these seems to have gotten out of sync with...
tavis_rudd authored
103 ## process this dir
104 processDir(dir)
105
106 ## and add sub-dirs
354c23e first attempt to merge the DEVEL_BRANCH changes
tavis_rudd authored
107 for baseName in listdir(dir):
108 fullPath = join(dir, baseName)
109 if isdir(fullPath):
110 if filterDir(baseName, fullPath):
111 addDir( fullPath )
112
113 def filterDir(self, baseName, fullPath):
114
115 """A hook for filtering out certain dirs. """
116
117 return not (baseName in self._ignoreBasenames or
118 fullPath in self._ignoreDirs)
119
120 def processDir(self, dir, glob=glob):
121 extend = self._files.extend
122 for pattern in self._globPatterns:
123 extend( glob(os.path.join(dir, pattern)) )
124
125 def files(self):
126 return self._files
127
e16dd6b removed all dependencies on modules not in the standard library so this ...
tavis_rudd authored
128 class _GenSubberFunc:
354c23e first attempt to merge the DEVEL_BRANCH changes
tavis_rudd authored
129
130 """Converts a 'sub' string in the form that one feeds to re.sub (backrefs,
131 groups, etc.) into a function that can be used to do the substitutions in
132 the FindAndReplace class."""
133
134 backrefRE = re.compile(r'\\([1-9][0-9]*)')
135 groupRE = re.compile(r'\\g<([a-zA-Z_][a-zA-Z_]*)>')
136
137 def __init__(self, replaceStr):
e16dd6b removed all dependencies on modules not in the standard library so this ...
tavis_rudd authored
138 self._src = replaceStr
139 self._pos = 0
354c23e first attempt to merge the DEVEL_BRANCH changes
tavis_rudd authored
140 self._codeChunks = []
141 self.parse()
142
e16dd6b removed all dependencies on modules not in the standard library so this ...
tavis_rudd authored
143 def src(self):
144 return self._src
145
146 def pos(self):
147 return self._pos
148
149 def setPos(self, pos):
150 self._pos = pos
151
152 def atEnd(self):
153 return self._pos >= len(self._src)
154
155 def advance(self, offset=1):
156 self._pos += offset
157
013e417 Cheetah's current version of these seems to have gotten out of sync with...
tavis_rudd authored
158 def readTo(self, to, start=None):
159 if start == None:
160 start = self._pos
161 self._pos = to
162 if self.atEnd():
163 return self._src[start:]
164 else:
165 return self._src[start:to]
166
354c23e first attempt to merge the DEVEL_BRANCH changes
tavis_rudd authored
167 ## match and get methods
168
169 def matchBackref(self):
170 return self.backrefRE.match(self.src(), self.pos())
171
172 def getBackref(self):
173 m = self.matchBackref()
174 self.setPos(m.end())
175 return m.group(1)
176
177 def matchGroup(self):
178 return self.groupRE.match(self.src(), self.pos())
179
180 def getGroup(self):
181 m = self.matchGroup()
182 self.setPos(m.end())
183 return m.group(1)
184
185 ## main parse loop and the eat methods
186
187 def parse(self):
188 while not self.atEnd():
189 if self.matchBackref():
190 self.eatBackref()
191 elif self.matchGroup():
192 self.eatGroup()
193 else:
194 self.eatStrConst()
195
196 def eatStrConst(self):
197 startPos = self.pos()
198 while not self.atEnd():
199 if self.matchBackref() or self.matchGroup():
200 break
201 else:
202 self.advance()
203 strConst = self.readTo(self.pos(), start=startPos)
204 self.addChunk(repr(strConst))
205
206 def eatBackref(self):
207 self.addChunk( 'm.group(' + self.getBackref() + ')' )
208
209 def eatGroup(self):
210 self.addChunk( 'm.group("' + self.getGroup() + '")' )
211
212 def addChunk(self, chunk):
213 self._codeChunks.append(chunk)
214
215 ## code wrapping methods
216
217 def codeBody(self):
218 return ', '.join(self._codeChunks)
219
220 def code(self):
221 return "def subber(m):\n\treturn ''.join([%s])\n" % (self.codeBody())
222
223 def subberFunc(self):
224 exec self.code()
225 return subber
226
227
228 class FindAndReplace:
229
230 """Find and replace all instances of 'patternOrRE' with 'replacement' for
231 each file in the 'files' list. This is a multi-file version of re.sub().
232
233 'patternOrRE' can be a raw regex pattern or
234 a regex object as generated by the re module. 'replacement' can be any
235 string that would work with patternOrRE.sub(replacement, fileContents).
236 """
237
238 def __init__(self, files, patternOrRE, replacement,
239 recordResults=True):
240
241
242 if type(patternOrRE) == StringType:
243 self._regex = re.compile(patternOrRE)
244 else:
245 self._regex = patternOrRE
246 if type(replacement) == StringType:
247 self._subber = _GenSubberFunc(replacement).subberFunc()
248 else:
249 self._subber = replacement
250
251 self._pattern = pattern = self._regex.pattern
252 self._files = files
253 self._results = {}
254 self._recordResults = recordResults
255
256 ## see if we should use pgrep to do the file matching
257 self._usePgrep = False
258 if (os.popen3('pgrep')[2].read()).startswith('Usage:'):
259 ## now check to make sure pgrep understands the pattern
260 tmpFile = mktemp()
261 open(tmpFile, 'w').write('#')
262 if not (os.popen3('pgrep "' + pattern + '" ' + tmpFile)[2].read()):
263 # it didn't print an error msg so we're ok
264 self._usePgrep = True
265 os.remove(tmpFile)
266
267 self._run()
268
269 def results(self):
270 return self._results
271
272 def _run(self):
273 regex = self._regex
274 subber = self._subDispatcher
275 usePgrep = self._usePgrep
276 pattern = self._pattern
277 for file in self._files:
e1d308d guard against attempted replacements on directories
tavis_rudd authored
278 if not os.path.isfile(file):
279 continue # skip dirs etc.
280
354c23e first attempt to merge the DEVEL_BRANCH changes
tavis_rudd authored
281 self._currFile = file
282 found = False
283 if locals().has_key('orig'):
284 del orig
285 if self._usePgrep:
286 if os.popen('pgrep "' + pattern + '" ' + file ).read():
287 found = True
288 else:
289 orig = open(file).read()
290 if regex.search(orig):
291 found = True
292 if found:
293 if not locals().has_key('orig'):
294 orig = open(file).read()
295 new = regex.sub(subber, orig)
296 open(file, 'w').write(new)
297
298 def _subDispatcher(self, match):
299 if self._recordResults:
300 if not self._results.has_key(self._currFile):
301 res = self._results[self._currFile] = {}
302 res['count'] = 0
303 res['matches'] = []
304 else:
305 res = self._results[self._currFile]
306 res['count'] += 1
307 res['matches'].append({'contents':match.group(),
308 'start':match.start(),
309 'end':match.end(),
310 }
311 )
312 return self._subber(match)
013e417 Cheetah's current version of these seems to have gotten out of sync with...
tavis_rudd authored
313
314
315 class SourceFileStats:
316
317 """
318 """
319
320 _fileStats = None
321
322 def __init__(self, files):
323 self._fileStats = stats = {}
324 for file in files:
325 stats[file] = self.getFileStats(file)
326
327 def rawStats(self):
328 return self._fileStats
329
330 def summary(self):
331 codeLines = 0
332 blankLines = 0
333 commentLines = 0
334 totalLines = 0
335 for fileStats in self.rawStats().values():
336 codeLines += fileStats['codeLines']
337 blankLines += fileStats['blankLines']
338 commentLines += fileStats['commentLines']
339 totalLines += fileStats['totalLines']
340
341 stats = {'codeLines':codeLines,
342 'blankLines':blankLines,
343 'commentLines':commentLines,
344 'totalLines':totalLines,
345 }
346 return stats
347
348 def printStats(self):
349 pass
350
351 def getFileStats(self, fileName):
352 codeLines = 0
353 blankLines = 0
354 commentLines = 0
355 commentLineRe = re.compile(r'\s#.*$')
356 blankLineRe = re.compile('\s$')
357 lines = open(fileName).read().splitlines()
358 totalLines = len(lines)
359
360 for line in lines:
361 if commentLineRe.match(line):
362 commentLines += 1
363 elif blankLineRe.match(line):
364 blankLines += 1
365 else:
366 codeLines += 1
367
368 stats = {'codeLines':codeLines,
369 'blankLines':blankLines,
370 'commentLines':commentLines,
371 'totalLines':totalLines,
372 }
373
374 return stats
Something went wrong with that request. Please try again.