Permalink
Cannot retrieve contributors at this time
Fetching contributors…
| ' Visual Basic .NET Document | |
| Option Infer On | |
| Option Strict On | |
| ' <Snippet12> | |
| Imports System.Collections.Generic | |
| Imports System.IO | |
| Imports System.Text.RegularExpressions | |
| Module Example | |
| Public Sub Main() | |
| Dim util As New RegexUtilities() | |
| Dim title As String = "Doyle - The Hound of the Baskervilles.txt" | |
| Try | |
| Dim info = util.GetWordData(title) | |
| Console.WriteLine("Words: {0:N0}", info.Item1) | |
| Console.WriteLine("Average Word Length: {0:N2} characters", info.Item2) | |
| Catch e As IOException | |
| Console.WriteLine("IOException reading file '{0}'", title) | |
| Console.WriteLine(e.Message) | |
| Catch e As RegexMatchTimeoutException | |
| Console.WriteLine("The operation timed out after {0:N0} milliseconds", | |
| e.MatchTimeout.TotalMilliseconds) | |
| End Try | |
| End Sub | |
| End Module | |
| Public Class RegexUtilities | |
| Public Function GetWordData(filename As String) As Tuple(Of Integer, Double) | |
| Const MAX_TIMEOUT As Integer = 1000 ' Maximum timeout interval in milliseconds. | |
| Const INCREMENT As Integer = 350 ' Milliseconds increment of timeout. | |
| Dim exclusions As New List(Of String)({"a", "an", "the" }) | |
| Dim wordLengths(30) As Integer ' Allocate an array of more than ample size. | |
| Dim input As String = Nothing | |
| Dim sr As StreamReader = Nothing | |
| Try | |
| sr = New StreamReader(filename) | |
| input = sr.ReadToEnd() | |
| Catch e As FileNotFoundException | |
| Dim msg As String = String.Format("Unable to find the file '{0}'", filename) | |
| Throw New IOException(msg, e) | |
| Catch e As IOException | |
| Throw New IOException(e.Message, e) | |
| Finally | |
| If sr IsNot Nothing Then sr.Close() | |
| End Try | |
| Dim timeoutInterval As Integer = INCREMENT | |
| Dim init As Boolean = False | |
| Dim rgx As Regex = Nothing | |
| Dim m As Match = Nothing | |
| Dim indexPos As Integer = 0 | |
| Do | |
| Try | |
| If Not init Then | |
| rgx = New Regex("\b\w+\b", RegexOptions.None, | |
| TimeSpan.FromMilliseconds(timeoutInterval)) | |
| m = rgx.Match(input, indexPos) | |
| init = True | |
| Else | |
| m = m.NextMatch() | |
| End If | |
| If m.Success Then | |
| If Not exclusions.Contains(m.Value.ToLower()) Then | |
| wordLengths(m.Value.Length) += 1 | |
| End If | |
| indexPos += m.Length + 1 | |
| End If | |
| Catch e As RegexMatchTimeoutException | |
| If e.MatchTimeout.TotalMilliseconds < MAX_TIMEOUT Then | |
| timeoutInterval += INCREMENT | |
| init = False | |
| Else | |
| ' Rethrow the exception. | |
| Throw | |
| End If | |
| End Try | |
| Loop While m.Success | |
| ' If regex completed successfully, calculate number of words and average length. | |
| Dim nWords As Integer | |
| Dim totalLength As Long | |
| For ctr As Integer = wordLengths.GetLowerBound(0) To wordLengths.GetUpperBound(0) | |
| nWords += wordLengths(ctr) | |
| totalLength += ctr * wordLengths(ctr) | |
| Next | |
| Return New Tuple(Of Integer, Double)(nWords, totalLength/nWords) | |
| End Function | |
| End Class | |
| ' </Snippet12> |