##  File I/O (Input / Output)

## Jupyter only writing to text file
* %%writefile filename.ext

Writes in the working directory (first run pwd) 

In [1]:
%%writefile somefile.txt
Oh my text first line
Super easy second line
OH and third line is

Writing somefile.txt


In [2]:
# filein is our own name for file 
filein = open('somefile.txt')

In [3]:
type(filein)

_io.TextIOWrapper

In [4]:

filein.read() # usually not that useful

'Oh my text first line\nSuper easy second line\nOH and third line is\n'

In [5]:
filein.read()

''

In [6]:
# there can be many pointers to file stream
fin = filein

In [7]:
fin.name

'somefile.txt'

In [8]:
fin.read() # what will happen ?

''

In [9]:
fin.seek(6)
fin.read()

'text first line\nSuper easy second line\nOH and third line is\n'

In [10]:
fin.seek(6)
text6= fin.read()
print(text6)

text first line
Super easy second line
OH and third line is



In [13]:
# we seek the very beginning of the file
fin.seek(0)
fulltext = fin.read()
print(fulltext)

Oh my text first line
Super easy second line
OH and third line is



In [14]:
fulltext[6:]

'text first line\nSuper easy second line\nOH and third line is\n'

In [15]:
fin.seek(0)
mylines=fin.readlines()
mylines

['Oh my text first line\n',
 'Super easy second line\n',
 'OH and third line is\n']

In [16]:
type(mylines)

list

In [9]:
type(mylines)

list

In [10]:
len(mylines)

3

In [12]:
# list comprehension to generate line lengths
linelengths = [len(line) for line in mylines]
linelengths

[22, 23, 21]

In [17]:
mylines[0]

'Oh my text first line\n'

In [18]:
print(mylines[0])

Oh my text first line



In [19]:
# if we need the representation of string then we use built in repr function
print(mylines[0])
print(repr(mylines[0])) # same as mylines[0] as the first last item
mylines[0]

Oh my text first line

'Oh my text first line\n'


'Oh my text first line\n'

In [20]:
# we go trough each line and do some work
for line in mylines:
    print(f'My Line Is:{line}')
    # Do more work on lines

My Line Is:Oh my text first line

My Line Is:Super easy second line

My Line Is:OH and third line is



In [21]:
# We can go throught the file directly (without reading the whole file into memory)
fin.seek(0)
# we can go through very large files here
for line in fin:
    print(line, end='')

Oh my text first line
Super easy second line
OH and third line is


In [22]:
for line  in mylines:
    print(line)

Oh my text first line

Super easy second line

OH and third line is



In [23]:
#close the file if you .open() it!!
fin.close()

## Use with open always! 

* closes automatically!
* throws exceptions on errors

In [25]:
with open('badsomefile.txt') as fin:
    for line in fin:
        print(line)

FileNotFoundError: [Errno 2] No such file or directory: 'badsomefile.txt'

In [26]:
# Idiom on how to open AND close a file for reading and doing work
with open('somefile.txt') as fin:
    for line in fin:
        print(line)
    # we can do more work with file here
    # maybe fin.seek(0) to read it again for some reason
    # File will be closed once this line ends
print("file is closed already here")
#closes here!
#closes automatically!!! 

Oh my text first line

Super easy second line

OH and third line is

file is closed already here


In [27]:
fin.read()

ValueError: I/O operation on closed file.

## For MacOS and Linux
* use pwd to see where you are
### myfile = open("/Users/MyUserName/SomeFolder/MaybeAnotherFolder/myfile.txt")

## For Windows
* use pwd to see where you are
### myfile = open("C:\\Users\\MyUserName\\SomeFolder\\MaybeAnotherFolder\\myfile.txt")

In [29]:
# Jupyter Magic !someOScommand for example !dir or !ls
!dir

 Volume in drive C is Windows
 Volume Serial Number is 5AA0-2A07

 Directory of C:\Users\val-p1\Github\RCS_Data_Analysis_Python_2019_July

07/18/2019  04:10 PM    <DIR>          .
07/18/2019  04:10 PM    <DIR>          ..
07/18/2019  04:07 PM    <DIR>          .ipynb_checkpoints
07/13/2019  10:17 AM             7,614 All_Any.ipynb
07/18/2019  04:03 PM    <DIR>          data
07/13/2019  10:17 AM         7,435,248 Data_Analysis_Python_Introduction.pdf
07/13/2019  10:17 AM             1,713 Git_Workflow.md
07/16/2019  04:04 PM    <DIR>          img
07/13/2019  10:17 AM               217 Jupyter with Python.md
07/13/2019  10:17 AM             1,084 LICENSE
07/13/2019  10:17 AM            38,870 Python Classes.ipynb
07/13/2019  10:17 AM            47,880 Python Cleaning Up Text Files.ipynb
07/13/2019  10:17 AM               489 Python Comparison operators.md
07/13/2019  10:17 AM               203 Python Conditional Execution Branching.md
07/13/2019  10:17 AM            10,380 Python Data St

In [34]:
pwd

'C:\\Users\\val-p1\\Github\\RCS_Data_Analysis_Python_2019_July'

In [31]:
!dir

 Volume in drive C is Windows
 Volume Serial Number is 5AA0-2A07

 Directory of C:\Users\val-p1\Github\RCS_Data_Analysis_Python_2019_July

07/18/2019  04:10 PM    <DIR>          .
07/18/2019  04:10 PM    <DIR>          ..
07/18/2019  04:07 PM    <DIR>          .ipynb_checkpoints
07/13/2019  10:17 AM             7,614 All_Any.ipynb
07/18/2019  04:03 PM    <DIR>          data
07/13/2019  10:17 AM         7,435,248 Data_Analysis_Python_Introduction.pdf
07/13/2019  10:17 AM             1,713 Git_Workflow.md
07/16/2019  04:04 PM    <DIR>          img
07/13/2019  10:17 AM               217 Jupyter with Python.md
07/13/2019  10:17 AM             1,084 LICENSE
07/13/2019  10:17 AM            38,870 Python Classes.ipynb
07/13/2019  10:17 AM            47,880 Python Cleaning Up Text Files.ipynb
07/13/2019  10:17 AM               489 Python Comparison operators.md
07/13/2019  10:17 AM               203 Python Conditional Execution Branching.md
07/13/2019  10:17 AM            10,380 Python Data St

In [28]:
pwd

'C:\\Users\\val-p1\\Github\\RCS_Data_Analysis_Python_2019_July'

In [36]:
# importing OS specific library for system work
import os

In [39]:
cwd = os.getcwd()
cwd

'C:\\Users\\val-p1\\Github\\RCS_Data_Analysis_Python_2019_July'

In [41]:
with open(cwd+'\\somefile.txt') as f:
    for line in f:
        print(line)

Oh my text first line

Super easy second line

OH and third line is



In [44]:
with open('data\\Veidenbaums.txt', encoding='UTF-8') as f:
    mytext = f.read()
len(mytext)

12875

In [42]:

with open('C:\\Users\\val-p1\\Github\\RCS_Data_Analysis_Python_2019_July\\somefile.txt', mode='r') as fin:
    cont = fin.read()
print(cont)
#?open

Oh my text first line
Super easy second line
OH and third line is



In [27]:

with open('..\\..\\.gitconfig', mode='r') as fin:
    cont = fin.read()
print(cont)
#?open

[user]
	email = valdis.s.coding@gmail.com
	name = Valdis SCoding



In [45]:
#mode=w is write only and it deletes OLD file!!
with open('numbers.txt', mode='w') as fwriter:
    for n in range(1,10):
        fwriter.write(f'The number is {n*2} \n')
        #fout.write(f'The number is {n}') # diferences between \n and no \n
    #file is closed is here    

In [46]:
len('The number is {n*2} \n')*9

189

In [47]:
with open('numbers.txt') as freader:
    mytext = freader.read()
len(mytext)

158

In [48]:
with open('numbers.txt') as freader:
    sum = 0
    for line in freader:
        print(line)
        print(len(line))
        sum += len(line)
print(sum)

The number is 2 

17
The number is 4 

17
The number is 6 

17
The number is 8 

17
The number is 10 

18
The number is 12 

18
The number is 14 

18
The number is 16 

18
The number is 18 

18
158


In [51]:
from datetime import datetime
now = datetime.now()
now

datetime.datetime(2019, 7, 18, 16, 47, 28, 58718)

In [52]:
today = datetime.today()
print(today)

2019-07-18 16:47:32.332155


In [54]:
#mode=w is write only and it deletes OLD file!!
today = datetime.today()
with open(f'numbers{today.hour}_{today.minute}_{today.second}.txt', mode='w') as fwriter:
    for n in range(1,10):
        fwriter.write(f'The number is {n*2} \n')
        #fout.write(f'The number is {n}') # diferences between \n and no \n
    #file is closed is here    

In [55]:
today

datetime.datetime(2019, 7, 18, 16, 48, 38, 942791)

In [56]:
today.hour

16

In [58]:
today.minute

48

In [66]:
datetime.today()

datetime.datetime(2019, 7, 18, 16, 50, 0, 396834)

In [67]:
timestamp = datetime.timestamp(now)
timestamp

1563457648.058718

In [34]:
str(datetime.now())

'2019-05-21 16:33:12.606716'

In [68]:
with open('numbers.txt') as fin:
    print(fin.readlines())

['The number is 2 \n', 'The number is 4 \n', 'The number is 6 \n', 'The number is 8 \n', 'The number is 10 \n', 'The number is 12 \n', 'The number is 14 \n', 'The number is 16 \n', 'The number is 18 \n']


In [48]:
with open('numbers.txt', mode='r') as fin:
    for line in fin:
        print(line, end="")

The number is 2 
The number is 4 
The number is 6 
The number is 8 
The number is 10 
The number is 12 
The number is 14 
The number is 16 
The number is 18 


In [71]:
# We can append to the files without overwriting
with open('numbers.txt', mode='a') as fin:
    fin.write("This might not be the end\nThis is really the end")

In [72]:
with open('numbers.txt', mode='a') as fin:
    fin.seek(4) # seek will not work here for writing
    fin.write("This is not the end")

In [73]:
with open('numbers.txt') as f:
    print(f.read())

The number is 2 
The number is 4 
The number is 6 
The number is 8 
The number is 10 
The number is 12 
The number is 14 
The number is 16 
The number is 18 
This might not be the endThis might not be the endThis might not be the end
This is really the endThis is not the end


In [74]:
# Jupyter magic for reading files into notebook
%%readfile numbers.txt
The number is 2 
The number is 4 
The number is 6 
The number is 8 
The number is 10 
The number is 12 
The number is 14 
The number is 16 
The number is 18 
This is the end!

SyntaxError: invalid syntax (<ipython-input-74-1a07fac4098f>, line 3)

In [39]:
# Generally Preferably to read and write separately - VS

In [76]:
with open('somefile.txt', mode="r+") as f:
    print(f.readlines())
    f.write('moreinfo\tmore\n')

['Oh my text first line\n', 'Super easy second line\n', 'OH and third line is\n']


In [64]:
with open('sometext.txt') as f:
    print(f.readlines())

['moreinfo\tmore\n', 'moreinfo\tmore\n', 'moreinfo\tmore\n', 'moreinfo\tmore\n', 'moreinfo\tmore\n', 'moreinfo\tmore\n', 'moreinfo\tmore\n', 'moreinfo\tmore\n']


### Modes:
  *  mode='r' - Read Only
  * 'w' - Write Only (and will overwrite existing files!!!)
  * 'a' - Apend Only (stream is at the end of file!)
  * 'r+' - Read and Write
  * 'w+' - Write and Read with Overwriting existing/make new files
  
  From C (fopen)
   * ``r+''  Open for reading and writing.  The stream is positioned at the
         beginning of the file.
         
    *   ``w+''  Open for reading and writing.  The file is created if it does not
         exist, otherwise it is truncated(**destroyed!**).  The stream is positioned at
         the beginning of the file.    

In [77]:
!cd data/

In [78]:
!dir

 Volume in drive C is Windows
 Volume Serial Number is 5AA0-2A07

 Directory of C:\Users\val-p1\Github\RCS_Data_Analysis_Python_2019_July

07/18/2019  04:57 PM    <DIR>          .
07/18/2019  04:57 PM    <DIR>          ..
07/18/2019  04:07 PM    <DIR>          .ipynb_checkpoints
07/13/2019  10:17 AM             7,614 All_Any.ipynb
07/18/2019  04:03 PM    <DIR>          data
07/13/2019  10:17 AM         7,435,248 Data_Analysis_Python_Introduction.pdf
07/13/2019  10:17 AM             1,713 Git_Workflow.md
07/16/2019  04:04 PM    <DIR>          img
07/13/2019  10:17 AM               217 Jupyter with Python.md
07/13/2019  10:17 AM             1,084 LICENSE
07/18/2019  04:52 PM               285 numbers.txt
07/18/2019  04:48 PM               167 numbers16_48_38.txt
07/18/2019  04:48 PM               167 numbers16_48_9.txt
07/13/2019  10:17 AM            38,870 Python Classes.ipynb
07/13/2019  10:17 AM            47,880 Python Cleaning Up Text Files.ipynb
07/13/2019  10:17 AM               4

In [83]:
mylist = list(range(1,30))
mytextlist = [str(x)+'\n' for x in range(1,30)]
with open('list.txt', mode='w') as fwriter:
    fwriter.writelines(mytextlist)


In [84]:
with open('biglist.txt', mode='w') as fwriter:
    for n in range(1,1_000_000):
        fwriter.write('Line:'+ str(n)+'\n')


In [None]:
# Optimal writing for large files will be between how much you can hold in memory and how big the file will be

## More on String Formatting

## How do you convert values to strings? 

### In Python: pass it to the repr() or str() functions.

The str() function is meant to return representations of values which are fairly human-readable, while repr() is meant to generate representations which can be read by the interpreter (or will force a SyntaxError if there is no equivalent syntax). For objects which don’t have a particular representation for human consumption, str() will return the same value as repr(). 

In [1]:
print(str(34341235421))
print(repr(54))
print(str(54))
str(54)==repr(54)

34341235421
54
54


True

In [1]:
hi = "Hello\n"
hir = repr(hi)
print(hi)
print(hir)
hir==hi

Hello

'Hello\n'


False

### The goal of __repr__ is to be unambiguous: 
### The goal of __str__ is to be readable

## In other words: __repr__ is for developers, __str__ is for customers (end users)

In [5]:
for x in range(1,12):
    print(f'{x:2d} {x*x:3d} {x**3:4f}')

 1   1 1.000000
 2   4 8.000000
 3   9 27.000000
 4  16 64.000000
 5  25 125.000000
 6  36 216.000000
 7  49 343.000000
 8  64 512.000000
 9  81 729.000000
10 100 1000.000000
11 121 1331.000000


In [2]:
'-3.14'.zfill(7) # pads numeric string on the left with zeros

'-003.14'

In [66]:
# write to a text file squares.txt first 10 squares
# 1 squared is 1
# 2 squared is 4
for n in range(1,11):
    print(f'{n} squared is {n*n}')


1 squared is 1
2 squared is 4
3 squared is 9
4 squared is 16
5 squared is 25
6 squared is 36
7 squared is 49
8 squared is 64
9 squared is 81
10 squared is 100


In [68]:
# write to a text file squares.txt first 10 squares
# 1 squared is 1
# 2 squared is 4
with open('squares.txt', mode='w') as f:
    for n in range(1,11):
        myline = f'{n} squared is {n*n}\n'
        print(myline)
        f.write(myline)


1 squared is 1

2 squared is 4

3 squared is 9

4 squared is 16

5 squared is 25

6 squared is 36

7 squared is 49

8 squared is 64

9 squared is 81

10 squared is 100



In [70]:
# write to a text file squares.txt first 10 squares
# 1 squared is 1
# 2 squared is 4
with open('squares2.txt', mode='w') as f:
    mylines = [f'{n} squared is {n*n}\n' for n in range(1,11)]
    f.writelines(mylines)

In [75]:
with open('squares2.txt') as f:
    for line in f:
        # do something with line for example print it
        print(line, end="")

1 squared is 1
2 squared is 4
3 squared is 9
4 squared is 16
5 squared is 25
6 squared is 36
7 squared is 49
8 squared is 64
9 squared is 81
10 squared is 100


In [76]:
type(f)

_io.TextIOWrapper

In [77]:
dir(f)

['_CHUNK_SIZE',
 '__class__',
 '__del__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__enter__',
 '__eq__',
 '__exit__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__lt__',
 '__ne__',
 '__new__',
 '__next__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '_checkClosed',
 '_checkReadable',
 '_checkSeekable',
 '_checkWritable',
 '_finalizing',
 'buffer',
 'close',
 'closed',
 'detach',
 'encoding',
 'errors',
 'fileno',
 'flush',
 'isatty',
 'line_buffering',
 'mode',
 'name',
 'newlines',
 'read',
 'readable',
 'readline',
 'readlines',
 'reconfigure',
 'seek',
 'seekable',
 'tell',
 'truncate',
 'writable',
 'write',
 'write_through',
 'writelines']

In [78]:
f.close()

In [79]:
f.readlines()

ValueError: I/O operation on closed file.

In [None]:
## Homework
## Write function which writes Fizzbuzz 1 to 100 (5,7) to file fizzbuzz.txt
## Format example:
## 1: 1
## 2: 2
## 5: Fizz
## 6: 6
## 7: Buzz
#... 35: FizzBuzz