Permalink
Fetching contributors…
Cannot retrieve contributors at this time
93 lines (85 sloc) 3.26 KB
' Visual Basic .NET Document
Option Infer On
Option Strict On
' <Snippet12>
Imports System.Collections.Generic
Imports System.IO
Imports System.Text.RegularExpressions
Module Example
Public Sub Main()
Dim util As New RegexUtilities()
Dim title As String = "Doyle - The Hound of the Baskervilles.txt"
Try
Dim info = util.GetWordData(title)
Console.WriteLine("Words: {0:N0}", info.Item1)
Console.WriteLine("Average Word Length: {0:N2} characters", info.Item2)
Catch e As IOException
Console.WriteLine("IOException reading file '{0}'", title)
Console.WriteLine(e.Message)
Catch e As RegexMatchTimeoutException
Console.WriteLine("The operation timed out after {0:N0} milliseconds",
e.MatchTimeout.TotalMilliseconds)
End Try
End Sub
End Module
Public Class RegexUtilities
Public Function GetWordData(filename As String) As Tuple(Of Integer, Double)
Const MAX_TIMEOUT As Integer = 1000 ' Maximum timeout interval in milliseconds.
Const INCREMENT As Integer = 350 ' Milliseconds increment of timeout.
Dim exclusions As New List(Of String)({"a", "an", "the" })
Dim wordLengths(30) As Integer ' Allocate an array of more than ample size.
Dim input As String = Nothing
Dim sr As StreamReader = Nothing
Try
sr = New StreamReader(filename)
input = sr.ReadToEnd()
Catch e As FileNotFoundException
Dim msg As String = String.Format("Unable to find the file '{0}'", filename)
Throw New IOException(msg, e)
Catch e As IOException
Throw New IOException(e.Message, e)
Finally
If sr IsNot Nothing Then sr.Close()
End Try
Dim timeoutInterval As Integer = INCREMENT
Dim init As Boolean = False
Dim rgx As Regex = Nothing
Dim m As Match = Nothing
Dim indexPos As Integer = 0
Do
Try
If Not init Then
rgx = New Regex("\b\w+\b", RegexOptions.None,
TimeSpan.FromMilliseconds(timeoutInterval))
m = rgx.Match(input, indexPos)
init = True
Else
m = m.NextMatch()
End If
If m.Success Then
If Not exclusions.Contains(m.Value.ToLower()) Then
wordLengths(m.Value.Length) += 1
End If
indexPos += m.Length + 1
End If
Catch e As RegexMatchTimeoutException
If e.MatchTimeout.TotalMilliseconds < MAX_TIMEOUT Then
timeoutInterval += INCREMENT
init = False
Else
' Rethrow the exception.
Throw
End If
End Try
Loop While m.Success
' If regex completed successfully, calculate number of words and average length.
Dim nWords As Integer
Dim totalLength As Long
For ctr As Integer = wordLengths.GetLowerBound(0) To wordLengths.GetUpperBound(0)
nWords += wordLengths(ctr)
totalLength += ctr * wordLengths(ctr)
Next
Return New Tuple(Of Integer, Double)(nWords, totalLength/nWords)
End Function
End Class
' </Snippet12>