# Memory Mapped Files

# Reading memory mapped file

In [16]:
import os
filename = "/data/hemant/log.txt"
isFile = os.path.isfile(filename)
def regular_io(filename):
    with open(filename, mode="r", encoding="utf8") as file_obj:
        text = file_obj.read()

In [17]:
import mmap

def mmap_io(filename):
    with open(filename, mode="r", encoding="utf8") as file_obj:
        with mmap.mmap(file_obj.fileno(), length=0, access=mmap.ACCESS_READ) as mmap_obj:
            text = mmap_obj.read()

In [18]:
import timeit
timeit.repeat("regular_io(filename)",repeat=3,number=1,
        setup="from __main__ import regular_io, filename")

[1.4372454149997793, 0.9140535420010565, 0.9068077439878834]

In [19]:
timeit.repeat("mmap_io(filename)",repeat=3,number=1,
...     setup="from __main__ import mmap_io, filename")

[0.5854763259994797, 0.439783167996211, 0.4396393700008048]

# mmap object as strings - Slicing 

In [8]:
import mmap
filename = "/data/hemant/log.txt"
def mmap_io(filename):
    with open(filename, mode="r", encoding="utf8") as file_obj:
        with mmap.mmap(file_obj.fileno(), length=0, access=mmap.ACCESS_READ) as mmap_obj:
            print(mmap_obj[10:20])

# Searching memory mapped file

In [39]:
import mmap

def regular_io_find(filename):
    with open(filename, mode="r", encoding="utf-8") as file_obj:
        text = file_obj.read()
        text.find(" the ")

def mmap_io_find(filename):
    with open(filename, mode="r", encoding="utf-8") as file_obj:
        with mmap.mmap(file_obj.fileno(), length=0, access=mmap.ACCESS_READ) as mmap_obj:
            mmap_obj.find(b" the ")

In [40]:
import timeit
timeit.repeat("regular_io_find(filename)",repeat=3,number=1,
        setup="from __main__ import regular_io_find, filename")

[0.3952411329955794, 0.2799205269984668, 0.23502482900221366]

In [41]:
timeit.repeat("mmap_io_find(filename)",repeat=3,number=1,
        setup="from __main__ import mmap_io_find, filename")

[0.0007837999874027446, 0.00040584900125395507, 0.000454384004115127]

# Memory mapped Objects as Files

In [52]:
import mmap
def regular_io_find_and_seek(filename):
    with open(filename, mode="r", encoding="utf-8") as file_obj:
        file_obj.seek(10000)
        text = file_obj.read()
        text.find(" the ")

In [53]:
def mmap_io_find_and_seek(filename):
    with open(filename, mode="r", encoding="utf-8") as file_obj:
        with mmap.mmap(file_obj.fileno(), length=0, access=mmap.ACCESS_READ) as mmap_obj:
            mmap_obj.seek(10000)
            mmap_obj.find(b" the ")

In [54]:
import timeit
timeit.repeat("regular_io_find_and_seek(filename)",repeat=3,number=1,
        setup="from __main__ import regular_io_find_and_seek, filename")

[1.0732763529958902, 0.8469445810042089, 0.8576581630040891]

In [55]:
timeit.repeat("mmap_io_find_and_seek(filename)",repeat=3,number=1,
        setup="from __main__ import mmap_io_find_and_seek, filename")

[0.00016790399968158454, 0.00021273999300319701, 0.0004815139982383698]

# Writing memory mapped file 

In [None]:
import mmap
def mmap_io_write(filename, text):
    with open(filename, mode="w", encoding="utf-8") as file_obj:
        with mmap.mmap(file_obj.fileno(), length=0, access=mmap.ACCESS_WRITE) as mmap_obj:
            mmap_obj.write(text)

In [None]:
import mmap
def mmap_io_write(filename):
    with open(filename, mode="r+") as file_obj:
        with mmap.mmap(file_obj.fileno(), length=0, access=mmap.ACCESS_WRITE) as mmap_obj:
            mmap_obj[10:16] = b"python"
            mmap_obj.flush()

# Search and Replace text

In [59]:
import mmap
import os
import shutil

def regular_io_find_and_replace(filename):
    with open(filename, "r", encoding="utf-8") as orig_file_obj:
        with open("tmp.txt", "w", encoding="utf-8") as new_file_obj:
            orig_text = orig_file_obj.read()
            new_text = orig_text.replace(" the ", " eht ")
            new_file_obj.write(new_text)

    shutil.copyfile("tmp.txt", filename)
    os.remove("tmp.txt")

def mmap_io_find_and_replace(filename):
    with open(filename, mode="r+", encoding="utf-8") as file_obj:
        with mmap.mmap(file_obj.fileno(), length=0, access=mmap.ACCESS_WRITE) as mmap_obj:
            orig_text = mmap_obj.read()
            new_text = orig_text.replace(b" eht ", b" the ")
            mmap_obj[:] = new_text
            mmap_obj.flush()

In [57]:
import timeit
timeit.repeat("regular_io_find_and_replace(filename)",repeat=3,number=1,
        setup="from __main__ import regular_io_find_and_replace, filename")

[4.253578268006095, 3.1810569380031666, 3.3544113550015027]

In [60]:
timeit.repeat("mmap_io_find_and_replace(filename)",repeat=3,number=1,
        setup="from __main__ import mmap_io_find_and_replace, filename")

[2.264397455001017, 1.5541641849995358, 1.598239985993132]

In [9]:
mystring = "This is python training classroom"
repeatedstring = ' '.join([mystring]*5000000)
#print(repeatedstring)
file = open("log.txt","w")
file.write(repeatedstring)
file.close()