In [69]:
import re

In [70]:
def match_model(title, model):
    """
    Determine if model is reflected in title

    >>> match_model("sony cyber shot dsc w310", "dsc w310")
    MODEL-MATCH:EXACT
    >>> match_model("sony cyber shot dsc w310x", "dsc w310")
    NOTMATCH

    >>> match_model("sony cyber shot sx130is", "sx130 is")
    MODEL-MATCH:MODEL_NOSPACE
    >>> match_model("sony cyber shot usx130is", "sx130 is")
    NOTMATCH

    >>> match_model("sony cyber shot sx130 is", "sx130is")
    MODEL-MATCH:TITLE_NOSPACE
    >>> match_model("sony cyber shot usx130 is", "sx130is")
    NOTMATCH
    """

    if model in title:
        starts = [m.start() for m in re.finditer(model, title)]
        for si in starts:
            ei = si + len(model) - 1
            if si >= 1 and title[si-1] not in [' ']:
                return "NOTMATCH"
            if ei+1 <= len(title)-1 and title[ei+1] not in [' ']:
                return "NOTMATCH"
        return "MODEL-MATCH:EXACT"

    model_nospace = model.replace(" ", "")
    if model_nospace in title:
        starts = [m.start() for m in re.finditer(model_nospace, title)]
        for si in starts:
            ei = si + len(model_nospace) - 1
            if si >= 1 and title[si-1] not in [' ']:
                return "NOTMATCH"
            if ei+1 <= len(title)-1 and title[ei+1] not in [' ']:
                return "NOTMATCH"
        return "MODEL-MATCH:MODEL_NOSPACE"

    # strip all space from title too
    title_nospace = title.replace(" ", "")
    if model_nospace in title_nospace:
        # for title_nospace, find mapping to index in original string
        mapping = []
        tidx = 0
        for i, v in enumerate(title_nospace):
            while v != title[tidx]:
                tidx += 1
            mapping.append(tidx)
            tidx += 1
        assert(tidx == len(title))

        starts = [m.start() for m in re.finditer(model_nospace, title_nospace)]
        ends = [x+len(model_nospace)-1 for x in starts]
        # map starts/ends back to indexes in original title
        starts_orig = [mapping[i] for i in starts]
        ends_orig = [mapping[i] for i in ends]

        for si in starts_orig:
            if si >= 1 and title[si-1] not in [' ']:
                return "NOTMATCH"
        for ei in ends_orig:
            if ei+1 <= len(title)-1 and title[ei+1] not in [' ']:
                return "NOTMATCH"
        return "MODEL-MATCH:TITLE_NOSPACE"

    return "NOTMATCH"

In [72]:
print(match_model("sony cyber shot dsc w310", "dsc w310"))
print(match_model("sony cyber shot dsc w310x", "dsc w310"))
print(match_model("sony cyber shot sx130is", "sx130 is"))
print(match_model("sony cyber shot usx130is", "sx130 is"))
print(match_model("sony cyber shot sx130 is", "sx130is"))
print(match_model("sony cyber shot usx130 is", "sx130is"))

MODEL-MATCH:EXACT
NOTMATCH
MODEL-MATCH:MODEL_NOSPACE
NOTMATCH
MODEL-MATCH:TITLE_NOSPACE
NOTMATCH


In [6]:
str = "this is string example....wow!!! this is really string";
print str.replace("is", "was")
print str.replace("is", "was", 3)

thwas was string example....wow!!! thwas was really string
thwas was string example....wow!!! thwas is really string


In [22]:
import re
def purge_model_from_title(model, title, match_result):
    if match_result == "MODEL-MATCH:EXACT":
        return title.replace(model, "")
    if match_result == "MODEL-MATCH:MODEL_NOSPACE":
        return title.replace(model.replace(" ",""), "")
    if match_result == "MODEL-MATCH:TITLE_NOSPACE":
        model_nospace = model.replace(" ", "")
        title_nospace = title.replace(" ", "")
        # for title_nospace, find mapping to index in original string
        mapping = []
        tidx = 0
        for i, v in enumerate(title_nospace):
            while v != title[tidx]:
                tidx += 1
            mapping.append(tidx)
            tidx += 1
        assert(tidx == len(title))
        starts = [m.start() for m in re.finditer(model_nospace, title_nospace)]
        ends = [x+len(model_nospace)-1 for x in starts]
        # map starts/ends back to indexes in original title
        starts_orig = [mapping[i] for i in starts]
        ends_orig = [mapping[i] for i in ends]

        rt = ""
        s0 = starts_orig[0]
        rt += title[:s0]
        for i in range(len(ends_orig)-1):
            ei = ends_orig[i]
            si1 = starts_orig[i+1]
            rt += title[ei+1:si1]
        rt += title[ends_orig[-1]+1:]
        return rt

In [28]:
print purge_model_from_title("dsc w310", "sony cyber shot dsc w310", "MODEL-MATCH:EXACT")
print purge_model_from_title("sx130 is", "sony cyber shot sx130is", "MODEL-MATCH:MODEL_NOSPACE")
print purge_model_from_title("sx130is", "sony sx130 is cyber sx130is shot sx130 is hello", "MODEL-MATCH:TITLE_NOSPACE")

sony cyber shot 
sony cyber shot 
sony  cyber  shot  hello


In [32]:
import string
set(string.punctuation)

{'!',
 '"',
 '#',
 '$',
 '%',
 '&',
 "'",
 '(',
 ')',
 '*',
 '+',
 ',',
 '-',
 '.',
 '/',
 ':',
 ';',
 '<',
 '=',
 '>',
 '?',
 '@',
 '[',
 '\\',
 ']',
 '^',
 '_',
 '`',
 '{',
 '|',
 '}',
 '~'}