## Files

### a file is a contiguous set of bytes used to store data. 
### data is organized as simple text file or as complicated program executable
### Byte files are then translated into binary 1 and 0 for easier processing by the computer

## Files on most modern file systems are composed of three main parts:

### Header: metadata about the contents of the file (file name, size, type, and so on)
### Data: contents of the file as written by the creator or editor
### End of file (EOF): special character that indicates the end of the file

### File paths (depending on OS)
### File has a name and an extension separated with a dot(.)
### Line endings CR + LF or \r\n (windows, American Standards Association), only LF (Unix, ISO)
### Character Encodings: ASCII/UTF8 (128) or UNICODE(1,114,112)

## Opening and Closing files

In [1]:
fh = open('emails.txt') # default in read mode
fh.close()

In [2]:
with open('emails.txt') as fh:
    pass

In [3]:
with open('emails.txt', 'rb') as fh: # Buffered Binary files
    pass

In [None]:
with open('emails.txt', 'rb', buffering=0) as fh: # Binary files (raw files)
    pass

In [46]:
with open('emails.txt') as fh:
    str = fh.read()

print(str)

akkinjarapu@gmail.com
anandakumark@aec.edu.in
venkateshm@aec.edu.in


In [5]:
with open('emails.txt') as fh:
    print(fh.read(10))

akkinjarap


In [6]:
with open('emails.txt') as fh:
    print(fh.readline())

akkinjarapu@gmail.com



In [9]:
with open('emails.txt') as fh:
    print(fh.readline(15))
    print(fh.readline(15))
    print(fh.readline(15))

akkinjarapu@gma
il.com

anandakumark@ae


In [10]:
with open('emails.txt') as fh:
    print(fh.readlines())

['akkinjarapu@gmail.com\n', 'anandakumark@aec.edu.in\n', 'venkateshm@aec.edu.in']


## Iterating over each line

In [11]:
with open('emails.txt') as fh:
    while True:
        line = fh.readline() # reads the \n at the end of each line
        if(line == ''): break
        print(line)

akkinjarapu@gmail.com

anandakumark@aec.edu.in

venkateshm@aec.edu.in


In [12]:
with open('emails.txt') as fh:
    while True:
        line = fh.readline().rstrip() # reads the \n at the end of each line
        if(line == ''): break
        print(line)

akkinjarapu@gmail.com
anandakumark@aec.edu.in
venkateshm@aec.edu.in


In [14]:
with open('emails.txt') as fh:
    for line in fh.readlines():
        print(line.strip())

akkinjarapu@gmail.com
anandakumark@aec.edu.in
venkateshm@aec.edu.in


In [15]:
with open('emails.txt') as fh:
    for line in fh.read().splitlines():
        print(line)

akkinjarapu@gmail.com
anandakumark@aec.edu.in
venkateshm@aec.edu.in


In [16]:
with open('emails.txt') as fh:
    for line in fh.read().split('\n'):
        print(line)

akkinjarapu@gmail.com
anandakumark@aec.edu.in
venkateshm@aec.edu.in


In [18]:
# recommended (quicker and memory efficient)
with open('emails.txt') as fh:
    for line in fh:
        print(line.rstrip())

akkinjarapu@gmail.com
anandakumark@aec.edu.in
venkateshm@aec.edu.in


## Writing into files

In [47]:
with open('email.txt', 'w') as fh:
    fh.write('akk@gmail.com\n')
    fh.write('abc@gmail.com')
    fh.writelines(['abc@hotmail.com', 'abc@yahoo.com\n', 'abc@ymail.com'])

## Reading binary files

In [28]:
with open('dog.jpg', 'rb') as fh:
    print(fh.read(1)) 
    print(fh.read(3))
    print(fh.read(2)) 
    print(fh.read(1)) 
    print(fh.read(1)) 

b'\xff'
b'\xd8\xff\xe0'
b'\x00\x10'
b'J'
b'F'


## Random access

In [33]:
with open('emails.txt') as fh:
    for line in fh:
        print(line.rstrip())

akkinjarapu@gmail.com
anandakumark@aec.edu.in
venkateshm@aec.edu.in


In [53]:
with open('emails.txt') as fh:
    print(fh.tell())
    print(fh.readline().rstrip())
    print(fh.tell())
    print(fh.seek(5))
    print(fh.readline().rstrip())
    print(fh.seek(0, 2))
    print(fh.readline().rstrip())
print('for text files negative indexing not allowed')
print('for binary files negative indexing allowed')

0
akkinjarapu@gmail.com
23
5
jarapu@gmail.com
69

for text files negative indexing not allowed
for binary files negative indexing allowed


In [52]:
with open('emails.txt', 'rb') as fh:
    print(fh.tell())
    print(fh.readline().rstrip())
    print(fh.tell())
    print(fh.seek(-5, 1))
    print(fh.readline().rstrip())
    print(fh.seek(-5, 2))
    print(fh.readline().rstrip())

0
b'akkinjarapu@gmail.com'
23
18
b'com'
64
b'du.in'


## Directory listing

In [29]:
import os
os.listdir('.')

['.git',
 'ctemps.txt',
 'dog.jpg',
 'dog.png',
 'email.txt',
 'emails.txt',
 'ftemps.txt',
 'lab.ipynb',
 'mathutil',
 'presentations',
 'program-1000.ipynb',
 'program-1001.ipynb',
 'program-1002.ipynb',
 'program-1003.ipynb',
 'program-1004.ipynb',
 'program-1005.ipynb',
 'program-1006.ipynb',
 'program-1007.ipynb',
 'program-2000.ipynb',
 'program-2001.ipynb',
 'program-2002.ipynb',
 'program-2003.ipynb',
 'Program-3001.ipynb',
 'program-3002.ipynb',
 'program-3003.ipynb',
 'program-4002.ipynb',
 'program-5001.ipynb',
 'README.md',
 'recap.ipynb',
 'U1-0001.py',
 'util.py']

In [30]:
os.scandir('.') # > 3.5 version

<nt.ScandirIterator at 0x26ea05ec250>

In [31]:
with os.scandir('.') as entries:
    for entry in entries:
        print(entry.name)

.git
ctemps.txt
dog.jpg
dog.png
email.txt
emails.txt
ftemps.txt
lab.ipynb
mathutil
presentations
program-1000.ipynb
program-1001.ipynb
program-1002.ipynb
program-1003.ipynb
program-1004.ipynb
program-1005.ipynb
program-1006.ipynb
program-1007.ipynb
program-2000.ipynb
program-2001.ipynb
program-2002.ipynb
program-2003.ipynb
Program-3001.ipynb
program-3002.ipynb
program-3003.ipynb
program-4002.ipynb
program-5001.ipynb
README.md
recap.ipynb
U1-0001.py
util.py


In [32]:
from pathlib import Path

entries = Path('.')
for entry in entries.iterdir():
    print(entry.name)

.git
ctemps.txt
dog.jpg
dog.png
email.txt
emails.txt
ftemps.txt
lab.ipynb
mathutil
presentations
program-1000.ipynb
program-1001.ipynb
program-1002.ipynb
program-1003.ipynb
program-1004.ipynb
program-1005.ipynb
program-1006.ipynb
program-1007.ipynb
program-2000.ipynb
program-2001.ipynb
program-2002.ipynb
program-2003.ipynb
Program-3001.ipynb
program-3002.ipynb
program-3003.ipynb
program-4002.ipynb
program-5001.ipynb
README.md
recap.ipynb
U1-0001.py
util.py


## filter only subdirectories

In [54]:
list(filter(lambda f: os.path.isdir(os.path.join('.', f)), os.listdir('.')))

['.git', 'mathutil', 'presentations']

## filter only files

In [55]:
list(filter(lambda f: os.path.isfile(os.path.join('.', f)), os.listdir('.')))

['ctemps.txt',
 'dog.jpg',
 'dog.png',
 'email.txt',
 'emails.txt',
 'ftemps.txt',
 'lab.ipynb',
 'program-1000.ipynb',
 'program-1001.ipynb',
 'program-1002.ipynb',
 'program-1003.ipynb',
 'program-1004.ipynb',
 'program-1005.ipynb',
 'program-1006.ipynb',
 'program-1007.ipynb',
 'program-2000.ipynb',
 'program-2001.ipynb',
 'program-2002.ipynb',
 'program-2003.ipynb',
 'Program-3001.ipynb',
 'program-3002.ipynb',
 'program-3003.ipynb',
 'program-4002.ipynb',
 'program-5001.ipynb',
 'README.md',
 'recap.ipynb',
 'U1-0001.py',
 'util.py']

## filter only .py files

In [56]:
list(filter(lambda f: os.path.isfile(os.path.join('.', f)) and f.endswith('.py'), os.listdir('.')))

['U1-0001.py', 'util.py']

In [64]:
import glob
glob.glob('*.py')

['U1-0001.py', 'util.py']

## File Attibutes

In [57]:
os.stat(os.path.curdir)

os.stat_result(st_mode=16895, st_ino=1407374883618116, st_dev=985140597, st_nlink=1, st_uid=0, st_gid=0, st_size=4096, st_atime=1631525825, st_mtime=1631510356, st_ctime=1622875743)

## Creating and removing directories

In [58]:
os.mkdir('anand')

In [59]:
os.makedirs('anand/subdir/level1')

In [60]:
os.rmdir('anand/subdir/level1')

In [63]:
os.removedirs('anand/subdir')

## writing log files

In [10]:
import logging

## severity levels in increasing order: DEBUG INFO WARNING ERROR CRITICAL

In [2]:
logging.debug('This is a debug message')
logging.info('This is an info message')
logging.warning('This is a warning message')
logging.error('This is an error message')
logging.critical('This is a critical message')

ERROR:root:This is an error message
CRITICAL:root:This is a critical message


## by default, the logging module logs the messages with a severity level of WARNING or above

In [11]:
logging.basicConfig(level=logging.DEBUG)

In [12]:
logging.debug('This is a debug message')
logging.info('This is an info message')
logging.warning('This is a warning message')
logging.error('This is an error message')
logging.critical('This is a critical message')

ERROR:root:This is an error message
CRITICAL:root:This is a critical message


In [13]:
logging.basicConfig(filename='app.log', filemode='w', format='%(asctime)s - %(name)s - %(process)d - %(levelname)s - %(message)s', level = logging.DEBUG)

In [14]:
logging.debug('This is a debug message')
logging.info('This is an info message')
logging.warning('This is a warning message')
logging.error('This is an error message')
logging.critical('This is a critical message')

ERROR:root:This is an error message
CRITICAL:root:This is a critical message
