# Chapter 16 File input and output

#### A file "ids.txt" is created beforehand, containing a gene ID, a tab and a GC-content value

In [3]:
import io

## io.open()

In [2]:
fhandle = io.open("data_16/ids.txt")

## .readline()

In [3]:
l1 = fhandle.readline()
l2 = fhandle.readline()
l3 = fhandle.readline()

In [4]:
print(l1)
print(l2)
print(l3)

CYP6B	0.24

AGP4	0.96

CATB	0.37


In [5]:
fhandle.close()
# to close the file

## Another approach (ensuring the file handle is closed)

In [6]:
with io.open("data_16/ids.txt") as fhandle:
    l1 = fhandle.readline()
    l2 = fhandle.readline()
    l3 = fhandle.readline()
    print(l1)
    print(l2)
    print(l3)
print("done processing; file handle closed")

CYP6B	0.24

AGP4	0.96

CATB	0.37
done processing; file handle closed


#### one can see that the additional lines between each line are annoying; this is because of the /n characters at the end of each lines; we can have them removed by using .strip()

In [7]:
with io.open("data_16/ids.txt") as fhandle:
    l1 = fhandle.readline()
    l2 = fhandle.readline()
    l3 = fhandle.readline()
    l1s = l1.strip()
    l2s = l2.strip()
    l3s = l3.strip()
    print(l1s)
    print(l2s)
    print(l3s)
print("done processing; file handle closed")

CYP6B	0.24
AGP4	0.96
CATB	0.37
done processing; file handle closed


In [8]:
# more advanced:
with io.open("data_16/ids.txt") as fhandle:
    for i in range(3):
        l = fhandle.readline().strip()
        print(l)
print("done processing; file handle closed")

CYP6B	0.24
AGP4	0.96
CATB	0.37
done processing; file handle closed


## side-track: finding the lines in a file

In [9]:
no = sum(1 for line in io.open("data_16/ids.txt"))
print(no)

3


## locating into a specific column in the txt file

In [10]:
with io.open("data_16/ids.txt") as fhandle:
    for i in range(no):
        l = fhandle.readline().strip()
        l_ls = l.split("\t") # each column is split by tab
        print(l_ls[1])

0.24
0.96
0.37


In [11]:
# calculating the average of this column

l_ls = list()

with io.open("data_16/ids.txt") as fhandle:
    for i in range(no):
        l = fhandle.readline().strip()
        l_ls.append(float(l.split("\t")[1])) # an important line
        
print(sum(l_ls)/len(l_ls))

0.5233333333333333


## Writing data using .write()

In [12]:
# creating some data first
data0 = [i for i in range(10)]
data1 = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"]
print(data0)
print(data1)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']


In [13]:
with io.open("data_16/num.txt", "w") as fh:
    for i in range(10):
        fh.write(str(data0[i]) + "\t" + data1[i] + "\n")

In [14]:
# reading the written file
no = sum(1 for line in io.open("data_16/num.txt"))
with io.open("data_16/num.txt") as fh:
    for i in range(no):
        print(fh.readline().strip())
print("finished")

0	A
1	B
2	C
3	D
4	E
5	F
6	G
7	H
8	I
9	J
finished


## Q1:

In [15]:
# visualizing the dataset (first 5 rows)
no = sum(1 for line in io.open("data_16/pz_blastx_yeast_top1.txt"))
with io.open("data_16/pz_blastx_yeast_top1.txt") as fh:
    for i in range(5):
        print(fh.readline().strip().split("\t"))

['PZ7180000000004_TY', 'YKL081W', '31.07', '338', '197', '8', '13', '993', '1', '313', '1e-32', '  124']
['PZ1082_AB', 'YHR104W', '44.92', '118', '62', '3', '4', '348', '196', '313', '1e-26', '  100']
['PZ11_FX', 'YLR406C', '53.01', '83', '38', '1', '290', '42', '25', '106', '7e-15', '65.9']
['PZ7180000036154', 'YNL245C', '36.27', '102', '60', '3', '105', '395', '1', '102', '3e-07', '46.2']
['PZ605962', 'YKR079C', '29.57', '115', '66', '4', '429', '121', '479', '590', '3e-11', '59.3']


#### find the sd of the eleventh column

In [18]:
Eval_ls = list()

with io.open("data_16/pz_blastx_yeast_top1.txt") as fh:
    for i in range(no):
        Eval_ls.append(float(fh.readline().strip().split("\t")[10]))
        
print(Eval_ls)

[1e-32, 1e-26, 7e-15, 3e-07, 3e-11, 3e-37, 5e-52, 8e-45, 6e-30, 2e-26, 1e-16, 2e-08, 1e-09, 1e-22, 5e-14, 8e-09, 2e-21, 1e-49, 1e-14, 2e-10, 3e-18, 2e-13, 9e-24, 8e-11]


In [23]:
import statistics

In [24]:
print(statistics.stdev(Eval_ls))

6.112940484086607e-08


## Q2: Reverse all the lines in file

In [13]:
lines = list()
no = sum(1 for i in io.open("data_16/pz_blastx_yeast_top1.txt"))

with io.open("data_16/pz_blastx_yeast_top1.txt") as fh:
    for i in range(no):
        lines.append(fh.readline().strip())

rev = reversed(lines) # creating a reverseiterator
lines_rev = list(rev)

In [16]:
with io.open("data_16/pz_blastx_yeast_top1_reversed.txt", "w") as fh:
    for i in range(len(lines_rev)):
        fh.write(lines_rev[i] + "\n")

## Q3: writing a quine

In [22]:
no = sum(1 for i in io.open("2021-09-13-155009 A Primer Ch 16.ipynb"))
print(no)

605


> with io.open("2021-09-13-155009 A Primer Ch 16.ipynb") as fh:
>>     for i in range(no):
>>>         l = fh.readline()
>>>         print(l)