In [28]:
def wer(ref, hyp ,debug=False):
    r = ref.split()
    h = hyp.split()
    #costs will holds the costs, like in the Levenshtein distance algorithm
    costs = [[0 for inner in range(len(h)+1)] for outer in range(len(r)+1)]
    # backtrace will hold the operations we've done.
    # so we could later backtrace, like the WER algorithm requires us to.
    backtrace = [[0 for inner in range(len(h)+1)] for outer in range(len(r)+1)]

    OP_OK = 0
    OP_SUB = 1
    OP_INS = 2
    OP_DEL = 3

    DEL_PENALTY=1 # Tact
    INS_PENALTY=1 # Tact
    SUB_PENALTY=1 # Tact
    # First column represents the case where we achieve zero
    # hypothesis words by deleting all reference words.
    for i in range(1, len(r)+1):
        costs[i][0] = DEL_PENALTY*i
        backtrace[i][0] = OP_DEL

    # First row represents the case where we achieve the hypothesis
    # by inserting all hypothesis words into a zero-length reference.
    for j in range(1, len(h) + 1):
        costs[0][j] = INS_PENALTY * j
        backtrace[0][j] = OP_INS

    # computation
    for i in range(1, len(r)+1):
        for j in range(1, len(h)+1):
            if r[i-1] == h[j-1]:
                costs[i][j] = costs[i-1][j-1]
                backtrace[i][j] = OP_OK
            else:
                substitutionCost = costs[i-1][j-1] + SUB_PENALTY # penalty is always 1
                insertionCost    = costs[i][j-1] + INS_PENALTY   # penalty is always 1
                deletionCost     = costs[i-1][j] + DEL_PENALTY   # penalty is always 1

                costs[i][j] = min(substitutionCost, insertionCost, deletionCost)
                if costs[i][j] == substitutionCost:
                    backtrace[i][j] = OP_SUB
                elif costs[i][j] == insertionCost:
                    backtrace[i][j] = OP_INS
                else:
                    backtrace[i][j] = OP_DEL

    # back trace though the best route:
    i = len(r)
    j = len(h)
    numSub = 0
    numDel = 0
    numIns = 0
    numCor = 0
    if debug:
        print("OP\tREF\tHYP")
        lines = []
    while i > 0 or j > 0:
        if backtrace[i][j] == OP_OK:
            numCor += 1
            i-=1
            j-=1
            if debug:
                lines.append("OK\t" + r[i]+"\t"+h[j])
        elif backtrace[i][j] == OP_SUB:
            numSub +=1
            i-=1
            j-=1
            if debug:
                lines.append("SUB\t" + r[i]+"\t"+h[j])
        elif backtrace[i][j] == OP_INS:
            numIns += 1
            j-=1
            if debug:
                lines.append("INS\t" + "****" + "\t" + h[j])
        elif backtrace[i][j] == OP_DEL:
            numDel += 1
            i-=1
            if debug:
                lines.append("DEL\t" + r[i]+"\t"+"****")
    if debug:
        lines = reversed(lines)
        for line in lines:
            print(line)
        print("Ncor " + str(numCor))
        print("Nsub " + str(numSub))
        print("Ndel " + str(numDel))
        print("Nins " + str(numIns))
    return (numSub + numDel + numIns) / (float) (len(r))
    wer_result = round( (numSub + numDel + numIns) / (float) (len(r)), 3)
    return {'WER':wer_result, 'Cor':numCor, 'Sub':numSub, 'Ins':numIns, 'Del':numDel}


In [29]:
ref="DELHI 
2 1 
The expreesions of the VENDOR/S and the Vendee herein used shall nean and include themtheir respective legalheirs 
reprasentatives 
admnistrators, 
sucdessors 
executorsy 
nominess and assignees etc. 
WHEREAS THE VENDOR/S 35 the ARSOLOTE OWNER and 
in possess on of BUILT UP ONE FLAT ON GROUND FLOOR, UPTO CEILINE LEVEL BUILTON LAND AREA MEASURING 85 So YDS. OUT OF FHASRA NO. 354, BEARING PROPE RTY NO,1/19974-8, CONSISTING WITH WHATSOEVER THEREON AND WITHOUT ROOF RIGHT AND WITH THE ELECTRICITY AND WATER CONNE CTION, SITUATED AT IN THE AREA OF VILLAGE BABARPUR IN THE ABADI DE SALI NO.3, WEST BORAKH PARK ILLAQA SHAHDARA DELHI-119032 AND BOUNDED AS UNDER 
COMMON FACILITIES 
IN ENTIRE BUILDING. 
FITTED WITH 
EAST: WEST NORTH: SOUTH: 
PROPERTY OF 10b  PROPERTY OF PROPERTY OF RUAD 15 FT. 
CUNTD P"
hyp='tuan anh mot hai ba bon chin'
wer(ref, hyp ,debug=True)

OP	REF	HYP
SUB	Tuan	tuan
OK	anh	anh
OK	mot	mot
INS	****	hai
INS	****	ba
SUB	ha	bon
OK	chin	chin
Ncor 3
Nsub 2
Ndel 0
Nins 2


0.8

In [32]:
ref="DELHI 2 1 The expreesions of the VENDOR/S and the Vendee herein used shall nean and include themtheir respective legalheirs reprasentatives admnistrators, sucdessors executorsy nominess and assignees etc. WHEREAS THE VENDOR/S 35 the ARSOLOTE OWNER and in possess on of BUILT UP ONE FLAT ON GROUND FLOOR, UPTO CEILINE LEVEL BUILTON LAND AREA MEASURING 85 So YDS. OUT OF FHASRA NO. 354, BEARING PROPE RTY NO,1/19974-8, CONSISTING WITH WHATSOEVER THEREON AND WITHOUT ROOF RIGHT AND WITH THE ELECTRICITY AND WATER CONNE CTION, SITUATED AT IN THE AREA OF VILLAGE BABARPUR IN THE ABADI DE SALI NO.3, WEST BORAKH PARK ILLAQA SHAHDARA DELHI-119032 AND BOUNDED AS UNDER COMMON FACILITIES IN ENTIRE BUILDING. FITTED WITH EAST: WEST NORTH: SOUTH: PROPERTY OF 10b  PROPERTY OF PROPERTY OF RUAD 15 FT. CUNTD P"

In [36]:
hyp = "DELHI 2 1 The expression of the VENDOR/S and the vendee herein used shall mean and include them/thein respective legalheirs representatives,admanistmators, successors ,executors nominess and assignees ete. WHEREAS THE VENDOR/S is the ARSOLUTE OWNER and in possession of BUILT UP ONE FLAT ON GROUND FLOOR UPTO CEILING LEVEL BUILTON LAND AREA MEASURING 85 SQ YDS OUT OF KHASRA NO. 35A BEARING PROPERTY NO.1/10074-B, CONSISTING WITH WHATSOEVER THEREON AND WITHOUT ROOF RIGHT AND WITH THE COMMON FACILITIES,IN ENTIRE BUILDING,FITTED WITH ELECTRICITY AND WATER CONNECTION, SITUATED AT IN THE AREA OF VILLAGE BABARPUR IN THE ABADI OF GALI NO.3 WEST GORAKH PARK, TLLAQA SHAHDARA DELHI-110032, AND BOUNDED AS UNDER EAST:PROPERTY OFWEST:PROPERTY OFNORTH:PROPERTY OFSOUTH:ROAD 15 FT CONTD P/3"

In [37]:
wer(ref, hyp ,debug=True)

OP	REF	HYP
OK	DELHI	DELHI
OK	2	2
OK	1	1
OK	The	The
SUB	expreesions	expression
OK	of	of
OK	the	the
OK	VENDOR/S	VENDOR/S
OK	and	and
OK	the	the
SUB	Vendee	vendee
OK	herein	herein
OK	used	used
OK	shall	shall
SUB	nean	mean
OK	and	and
OK	include	include
SUB	themtheir	them/thein
OK	respective	respective
OK	legalheirs	legalheirs
DEL	reprasentatives	****
SUB	admnistrators,	representatives,admanistmators,
SUB	sucdessors	successors
SUB	executorsy	,executors
OK	nominess	nominess
OK	and	and
OK	assignees	assignees
SUB	etc.	ete.
OK	WHEREAS	WHEREAS
OK	THE	THE
OK	VENDOR/S	VENDOR/S
SUB	35	is
OK	the	the
SUB	ARSOLOTE	ARSOLUTE
OK	OWNER	OWNER
OK	and	and
OK	in	in
DEL	possess	****
SUB	on	possession
OK	of	of
OK	BUILT	BUILT
OK	UP	UP
OK	ONE	ONE
OK	FLAT	FLAT
OK	ON	ON
OK	GROUND	GROUND
SUB	FLOOR,	FLOOR
OK	UPTO	UPTO
SUB	CEILINE	CEILING
OK	LEVEL	LEVEL
OK	BUILTON	BUILTON
OK	LAND	LAND
OK	AREA	AREA
OK	MEASURING	MEASURING
OK	85	85
SUB	So	SQ
SUB	YDS.	YDS
OK	OUT	OUT
OK	OF	OF
SUB	FHASRA	KHASRA
OK	NO.	NO.
SUB	354,	35A
OK	BEA

0.4496124031007752