##  File I/O (Input / Output)

## Jupyter only writing to text file
* %%writefile filename.ext

Writes in the working directory (first run pwd) 

This is not Python specific command

In [1]:
import os
os.getcwd()

'd:\\Github\\RTU_Python_720_Fall_2020\\core'

In [1]:
%%writefile mylib.py
# this is a small .py file that we will use for as a module(import)
import math # importing standard Python math library
MY_PI = 3.1415926

def nb_year(pop_start, percent, yearly_arrival, pop_end):
    count = 0
    population = pop_start
    while population < pop_end:
        # short hand population *= (1+percent/100)
        # also shortone population += population * percent / 100
        population = population + math.floor(population * percent / 100)
        population += yearly_arrival
        count += 1
    return count

def add(a,b):
    return a+b

# could add main guard here

Writing mylib.py


In [2]:
import mylib # it works because C:\PyLib\mylib.py is in my PYTHONPATH enviroment variables
# import looks FIRST in your current directory

In [3]:
print(mylib.MY_PI)

3.1415926


In [4]:
mylib.nb_year(100,2,0,200)

42

In [5]:
mylib.add(5,15)  # so adding on the fly is problematic on Google Colab

20

In [6]:
%%writefile two_roads.txt
Robert Frost
The Road not Taken

Two roads diverged in a yellow wood,
And sorry I could not travel both
And be one traveler, long I stood
And looked down one as far as I could
To where it bent in the undergrowth;

Then took the other, just as fair,
And having perhaps the better claim,
Because it was grassy and wanted wear;
Though as for that the passing there
Had worn them really about the same,

3rd verse

And both that morning equally lay
In leaves, no step had trodden black.
Oh, I kept the first for another day!
Yet knowing how way leads on to way,
I doubted if I should ever come back.

4th verse

I shall be telling this with a sigh
Somewhere ages and ages hence:
Two roads diverged in a wood, and I—
I took the one less traveled by,
And that has made all the difference.

Writing two_roads.txt


In [8]:
from pathlib import Path

In [10]:
files = [f for f in Path(".").glob("*.txt") if f.is_file()]
files

[PosixPath('alice.txt'), PosixPath('two_roads.txt')]

In [11]:
alice_file_name = files[0]
alice_file_name

PosixPath('alice.txt')

In [12]:
with open(alice_file_name, encoding="utf-8") as f:
    alice_text = f.read()
print("Got", len(alice_text), "symbols in", alice_file_name)

Got 164410 symbols in alice.txt


In [20]:
import string
string.punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

In [21]:
clean_text = alice_text
for punct in string.punctuation:
    clean_text = clean_text.replace(punct, "") # so i can replace all occurences of all punctuation
print("Got", len(clean_text), "symbols after cleaning")

Got 158560 symbols after cleaning


In [22]:
# tokens = alice_text.split()
tokens = clean_text.split()
print("Got", len(tokens), "tokens in", alice_file_name)

Got 29574 tokens in alice.txt


In [23]:
from collections import Counter
word_count = Counter(tokens)
word_count.most_common(20)

[('the', 1684),
 ('and', 809),
 ('to', 786),
 ('a', 669),
 ('of', 604),
 ('she', 500),
 ('said', 453),
 ('it', 443),
 ('in', 412),
 ('Alice', 383),
 ('was', 352),
 ('you', 344),
 ('I', 266),
 ('as', 251),
 ('her', 244),
 ('that', 244),
 ('with', 220),
 ('at', 210),
 ('on', 188),
 ('all', 182)]

In [24]:
# CSV - comma separareted files are just text files with some structure
with open("alice_word_stats.csv", mode="w", encoding="utf-8") as f:
    f.write("word, frequency\n")
    # f.write("\n".join(word_count.most_common(50)))
    for my_tuple in word_count.most_common(50):
        f.write(f"{my_tuple[0]}, {my_tuple[1]}\n")


In [None]:
# filein is our own name for file stream
filein = open('two_roads.txt')

In [None]:
type(filein)

_io.TextIOWrapper

In [None]:

mytext = filein.read() # usually not that useful but you can read everything at once
print(mytext)

Robert Frost
The Road not Taken

Two roads diverged in a yellow wood,
And sorry I could not travel both
And be one traveler, long I stood
And looked down one as far as I could
To where it bent in the undergrowth;

Then took the other, just as fair,
And having perhaps the better claim,
Because it was grassy and wanted wear;
Though as for that the passing there
Had worn them really about the same,

3rd verse

And both that morning equally lay
In leaves, no step had trodden black.
Oh, I kept the first for another day!
Yet knowing how way leads on to way,
I doubted if I should ever come back.

4th verse

I shall be telling this with a sigh
Somewhere ages and ages hence:
Two roads diverged in a wood, and I—
I took the one less traveled by,
And that has made all the difference.


In [None]:
filein.read()

''

In [None]:
# it is like a record from vinyl records we went to the end of this file

In [None]:
# there can be many pointers to file stream
fin = filein

In [None]:
fin.name

'two_roads.txt'

In [None]:
fin.read() # what will happen ?

''

In [None]:
# if for some reason you want to read the file again you have to seek() back to some position

In [None]:
fin.seek(0) # so i seek back to 0 characters from the start - beginning
fin.read()

'Robert Frost\nThe Road not Taken\n\nTwo roads diverged in a yellow wood,\nAnd sorry I could not travel both\nAnd be one traveler, long I stood\nAnd looked down one as far as I could\nTo where it bent in the undergrowth;\n\nThen took the other, just as fair,\nAnd having perhaps the better claim,\nBecause it was grassy and wanted wear;\nThough as for that the passing there\nHad worn them really about the same,\n\n3rd verse\n\nAnd both that morning equally lay\nIn leaves, no step had trodden black.\nOh, I kept the first for another day!\nYet knowing how way leads on to way,\nI doubted if I should ever come back.\n\n4th verse\n\nI shall be telling this with a sigh\nSomewhere ages and ages hence:\nTwo roads diverged in a wood, and I—\nI took the one less traveled by,\nAnd that has made all the difference.'

In [None]:
fin.seek(6) # so i skip 6 characters and only then start reading
text6= fin.read()
print(text6)

text first line
Super easy second line
OH and third line is



In [None]:
# we seek the very beginning of the file
fin.seek(0)
fulltext = fin.read()
print(fulltext)

Oh my text first line
Super easy second line
OH and third line is



In [None]:
fulltext[6:]

'text first line\nSuper easy second line\nOH and third line is\n'

In [None]:
# more useful that .read() can be readlines() which read by looking for \n newline character
fin.seek(0)
mylines=fin.readlines()
mylines

['Oh my text first line\n',
 'Super easy second line\n',
 'OH and third line is\n']

In [None]:
type(mylines)

list

In [None]:
type(mylines)

list

In [None]:
len(mylines)

3

In [None]:
# list comprehension to generate line lengths
linelengths = [len(line) for line in mylines]
linelengths

[22, 23, 21]

In [None]:
mylines[0]

'Oh my text first line\n'

In [None]:
print(mylines[0])

Oh my text first line



In [None]:
# if we need the representation of string then we use built in repr function
print(mylines[0])
print(repr(mylines[0])) # same as mylines[0] as the first last item
mylines[0]

Oh my text first line

'Oh my text first line\n'


'Oh my text first line\n'

In [None]:
mylines

['Oh my text first line\n',
 'Super easy second line\n',
 'OH and third line is\n']

In [None]:
# we go trough each line and do some work
for line in mylines:
    print(f'My Line Is:{line}')
    # Do more work on lines

My Line Is:Oh my text first line

My Line Is:Super easy second line

My Line Is:OH and third line is



In [None]:
# We can go throught the file directly (without reading the whole file into memory)
fin.seek(0)
# we can go through very large files here as long as it has many newlines
for line in fin:
    print(line, end='')

Oh my text first line
Super easy second line
OH and third line is


In [None]:
for line in mylines:
    print(line)

Oh my text first line

Super easy second line

OH and third line is



In [None]:
#close the file if you .open() it!!
fin.close() 
# problem is that we can forget to close it and leave the file hanging so to say

## Use with open always! 

* closes automatically!
* throws exceptions on errors

In [None]:
# Idiom on how to open AND close a file for reading and doing work
with open('two_roads.txt') as fin: # fin or f or file_in whatever name makes most sense, f is very common
    for line in fin:
        print(line)
        # do wo with each line here,save into a list or other structure
    # we can do more work with file here
    # maybe fin.seek(0) to read it again for some reason
    # File will be closed once this line ends
# File is closed now    
print("file is closed already here")
#closes here!
#closes automatically!!! 

Robert Frost

The Road not Taken



Two roads diverged in a yellow wood,

And sorry I could not travel both

And be one traveler, long I stood

And looked down one as far as I could

To where it bent in the undergrowth;



Then took the other, just as fair,

And having perhaps the better claim,

Because it was grassy and wanted wear;

Though as for that the passing there

Had worn them really about the same,



3rd verse



And both that morning equally lay

In leaves, no step had trodden black.

Oh, I kept the first for another day!

Yet knowing how way leads on to way,

I doubted if I should ever come back.



4th verse



I shall be telling this with a sigh

Somewhere ages and ages hence:

Two roads diverged in a wood, and I—

I took the one less traveled by,

And that has made all the difference.
file is closed already here


In [None]:
fin.read() # File is already Closed

ValueError: ignored

## For MacOS and Linux
* use pwd to see where you are
### myfile = open("/Users/MyUserName/SomeFolder/MaybeAnotherFolder/myfile.txt")

## For Windows
* use pwd to see where you are
### myfile = open("C:\\Users\\MyUserName\\SomeFolder\\MaybeAnotherFolder\\myfile.txt")

In [None]:
# Jupyter Magic !someOScommand for example !dir or !ls
!dir

mylib.py  __pycache__  sample_data  two_roads.txt


In [None]:
pwd

'/content'

In [None]:
# importing OS specific library for system work
# idea being that we can do same on Windows/Mac/Linux and not worry about the OS
import os

In [None]:
cwd = os.getcwd()
cwd

'/content'

In [None]:
myfiles = os.listdir() # so this gives you a list of file names in your current directory
myfiles

['.config',
 '__pycache__',
 'mylib.py',
 '.ipynb_checkpoints',
 'two_roads.txt',
 'sample_data']

In [None]:
mytextfiles = [fname for fname in myfiles if fname.endswith(".txt")]
mytextfiles

['somefile.txt']

# absolute paths and relative paths

In [None]:
cwd # so that is my absolute path for current working directory

'C:\\Users\\liga\\Github\\RTU_Python_720_Fall_2020\\core'

In [None]:
# absolute path using cw
with open(cwd+'\\somefile.txt') as f:
    for line in f:
        print(line)

Oh my text first line

Super easy second line

OH and third line is



In [None]:
# join path no mater what OS we have
# meaning it will do / for Mac and Linux and \\ for Windows
fullpath = os.path.join(cwd, 'somefile.txt')
fullpath

'C:\\Users\\liga\\Github\\RTU_Python_720_Fall_2020\\core\\somefile.txt'

In [None]:
os.path.join(os.getcwd(), "myimages", "catpictures", "mycat.png")

'C:\\Users\\liga\\Github\\RTU_Python_720_Fall_2020\\core\\myimages\\catpictures\\mycat.png'

In [None]:
os.path.join(os.getcwd(), 'somefile.txt')

'C:\\Users\\liga\\Github\\RTU_Python_720_Fall_2020\\core\\somefile.txt'

In [None]:
# i get the current working directory and join the file path
with open(os.path.join(os.getcwd(), 'somefile.txt')) as f:
    for line in f:
        print(line)

Oh my text first line

Super easy second line

OH and third line is



In [None]:
# this is a relative path
# on a relative path we are going back one level with ..
# then we enter a sister/brother folder data
# and read the file from there
# with relative paths we do not have to worry about the outside world so much
# that is what kind of folder structure is outside our project
with open('../data/veidenbaums_clean.txt', encoding='UTF-8') as f:
    mytext = f.read()
len(mytext)

11549

In [None]:
afile = "../data/alice.txt"

In [None]:
with open(afile, encoding="utf-8") as f:
    whole_alice = f.read()
print(len(whole_alice), "Characters")

164410 Characters


In [None]:
whole_alice[:100]

'\ufeffThe Project Gutenberg EBook of Alice’s Adventures in Wonderland, by Lewis Carroll\n\nThis eBook is fo'

In [None]:
afile = "two_roads.txt"
to_find = "roads"

In [None]:
lines_to_keep = []
with open(afile, encoding="utf-8") as f:
    for line in f:
        if to_find in line:
            lines_to_keep.append(line)
# file is closed here already
print(len(lines_to_keep), f"lines with {to_find}")

2 lines with roads


In [None]:
lines_to_keep

['Two roads diverged in a yellow wood,\n',
 'Two roads diverged in a wood, and I—\n']

In [None]:
with open(afile, encoding="utf-8") as f:
    all_lines = f.readlines() # so have all the text saved in a list one row per list item
all_lines[:5]

['Robert Frost\n',
 'The Road not Taken\n',
 '\n',
 'Two roads diverged in a yellow wood,\n',
 'And sorry I could not travel both\n']

In [None]:
all_lines[-5:]

['I shall be telling this with a sigh\n',
 'Somewhere ages and ages hence:\n',
 'Two roads diverged in a wood, and I—\n',
 'I took the one less traveled by,\n',
 'And that has made all the difference.']

In [None]:
with open(afile) as f:
    lines_without_coding = f.readlines() # so have all the text saved in a list one row per list item
print(lines_without_coding[-5:])

['I shall be telling this with a sigh\n', 'Somewhere ages and ages hence:\n', 'Two roads diverged in a wood, and I—\n', 'I took the one less traveled by,\n', 'And that has made all the difference.']


In [None]:
good_words = ["wood", "road"]
mytext = "Yellow rabbit jumped over a brown Rabbit on a road indeed"

In [None]:
# any is similar idea to existential quantor in logic
# https://en.wikipedia.org/wiki/Existential_quantification#:~:text=In%20predicate%20logic%2C%20an%20existential,%2C%20or%20%22for%20some%22.
any(word in mytext for word in good_words ) # to check if any(1 or more words from my good_words list are in mytext

True

In [None]:
# https://en.wikipedia.org/wiki/Universal_quantification
all(word in mytext for word in good_words ) # same BUt needs ALL to be in the the text

False

In [None]:
good_words = ["morning", "road", "wood"]
lines_to_keep = [] # start with a blank list
with open(afile, encoding="utf-8") as f:
    for line in f:
        if any(word in line for word in good_words):
            lines_to_keep.append(line)
# file is closed here already
print(len(lines_to_keep), f"lines with {good_words}")

3 lines with ['morning', 'road', 'wood']


In [None]:
lines_to_keep[:10]

['Two roads diverged in a yellow wood,\n',
 'And both that morning equally lay\n',
 'Two roads diverged in a wood, and I—\n']

In [None]:
def get_filtered_list(fname, good_words=()):
    lines_to_keep = []
    with open(afile, encoding="utf-8") as f:
        for line in f: # we go through file line by line
            if any(word in line for word in good_words):
                lines_to_keep.append(line)
    # file is closed here already
    print(len(lines_to_keep), f"lines with {good_words}")
    return lines_to_keep

In [None]:
ages_grassy_list = get_filtered_list(afile, good_words=["ages", "grassy", "Riga"])
ages_grassy_list

2 lines with ['ages', 'grassy', 'Riga']


['Because it was grassy and wanted wear;\n',
 'Somewhere ages and ages hence:\n']

In [None]:
with open("ages_grassy.txt", mode="w", encoding="utf-8") as fwriter:
    fwriter.writelines(ages_grassy_list)


In [None]:
with open(afile, encoding="utf-8") as f: # f could be anything else 
    full_text = f.read() # so I ingest the whole text at once
full_text[:100]

'Robert Frost\nThe Road not Taken\n\nTwo roads diverged in a yellow wood,\nAnd sorry I could not travel b'

In [None]:
lines_from_full_text = full_text.split("\n") # usually we would use read lines instead
lines_from_full_text[:5]

['Robert Frost',
 'The Road not Taken',
 '',
 'Two roads diverged in a yellow wood,',
 'And sorry I could not travel both']

In [None]:
# if i do not have newlines I will need to add them myself
with open("first_5_lines.txt", mode="w", encoding="utf-8") as f:
    # f.writelines(lines_from_full_text[:5])
    # one way to fix it is to join the lines together with newline symbol \n
    f.write("\n\n".join(lines_from_full_text[:5])) # now I will add an extra empty line under each line

In [None]:
# with mode="w" each time I overwrite the old file
# the other option is to append at the end of an existing file
# NOTE: there is no way to insert at the beginning or middle without overwriting the whole 

In [None]:
# so we use mode="w" to append to an existing text file
with open("ages_grassy.txt", mode="a", encoding="utf-8") as f:
    f.write("Why hello mom, I am appending!\nTwo Lines even\n")
    f.writelines(lines_to_keep)

In [None]:
# typical workflow for smaller files (up to a couple million rows)
my_file_name = "two_roads.txt"
out_file = "two_roads_filtered.txt"
with open(my_file_name, encoding="utf-8") as f:
    my_lines = f.readlines()
# file is closed
# now my_lines is just a list of strings
# do whatever text processing you want and analysis
my_filtered_lines = [line for line in my_lines if "road" in line]
# i could do more processing here

with open(out_file, mode="w", encoding="utf-8") as f: # new f
    f.write("Just a header of my own choosing\n") # note the newline
    f.writelines(my_filtered_lines)

In [None]:
# if you have to process huge files (Gigabytes in size or even Terabytes)
# then we need a different and we open both reader and writer streams at once
# and process the incoming file one row(line) at a time
my_file_name = "two_roads.txt"
out_file = "two_roads_filtered.txt"
with open(my_file_name, encoding="utf-8") as fin, open(out_file, mode="w", encoding="utf-8") as fout:
    fout.write("I could add a header here if I wish\n")
    for row in fin: # we go through each line of a the incoming file
        if "road" in row:  #just a simple filter but you can use regular experession and stronger filters
            fout.write(row)
    fout.write("I can write a footer as well\n(C)2022")
# this approach will work even on huge files that do not fit in your computer memory

In [None]:
queen

[' CHAPTER VIII.  The Queen’s Croquet-Ground\n',
 'Queen to play croquet.” The Frog-Footman repeated, in the same solemn\n',
 'tone, only changing the order of the words a little, “From the Queen.\n',
 'croquet with the Queen,” and she hurried out of the room. The cook\n',
 'Queen to-day?”\n',
 'given by the Queen of Hearts, and I had to sing\n',
 'Queen jumped up and bawled out, ‘He’s murdering the time! Off with his\n',
 'The Queen’s Croquet-Ground\n',
 '“_You’d_ better not talk!” said Five. “I heard the Queen say only\n',
 '_red_ rose-tree, and we put a white one in by mistake; and if the Queen\n',
 'out “The Queen! The Queen!” and the three gardeners instantly threw\n',
 'and Alice looked round, eager to see the Queen.\n',
 'ornamented with hearts. Next came the guests, mostly Kings and Queens,\n',
 'at her, and the Queen said severely “Who is this?” She said it to the\n',
 '“Idiot!” said the Queen, tossing her head impatiently; and, turning to\n',
 '“And who are _these?_” said the

In [None]:
with open("alice_queen.txt", mode="w", encoding="utf-8") as f:
    f.writelines(queen)
# remember here file is already closed

In [None]:

with open('C:\\Users\\val-p1\\Github\\RCS_Data_Analysis_Python_2019_July\\somefile.txt', mode='r') as fin:
    cont = fin.read()
print(cont)
#?open

Oh my text first line
Super easy second line
OH and third line is



In [None]:
def get_good_not_bad(fname, good_words=(), bad_words=()):
    lines_to_keep = []
    with open(afile, encoding="utf-8") as f:
        for line in f: # we go through file line by line
            if any(word in line for word in good_words) and not any(word in line for word in bad_words):
                lines_to_keep.append(line)
    # file is closed here already
    print(len(lines_to_keep), f"lines with {good_words} and none with {bad_words}")
    return lines_to_keep

In [None]:
good_words

['rabbit', 'Rabbit', 'Alice']

In [None]:
bad_words = [" and ", " or "]

In [None]:
filtered_lines = get_good_not_bad(afile) # what happens with default values?
filtered_lines

0 lines with ()


[]

In [None]:
filtered_lines = get_good_not_bad(afile, good_words=good_words)

445 lines with ['rabbit', 'Rabbit', 'Alice'] and none with ()


In [None]:
filtered_lines = get_good_not_bad(afile, good_words=good_words, bad_words=bad_words)

361 lines with ['rabbit', 'Rabbit', 'Alice'] and none with [' and ', ' or ']


In [None]:
filtered_lines[:20]

['\ufeffThe Project Gutenberg EBook of Alice’s Adventures in Wonderland, by Lewis Carroll\n',
 'Title: Alice’s Adventures in Wonderland\n',
 'Alice’s Adventures in Wonderland\n',
 ' CHAPTER I.     Down the Rabbit-Hole\n',
 ' CHAPTER IV.    The Rabbit Sends in a Little Bill\n',
 ' CHAPTER XII.   Alice’s Evidence\n',
 'Down the Rabbit-Hole\n',
 'Alice was beginning to get very tired of sitting by her sister on the\n',
 'conversations in it, “and what is the use of a book,” thought Alice\n',
 'picking the daisies, when suddenly a White Rabbit with pink eyes ran\n',
 'There was nothing so _very_ remarkable in that; nor did Alice think it\n',
 'so _very_ much out of the way to hear the Rabbit say to itself, “Oh\n',
 'time it all seemed quite natural); but when the Rabbit actually _took a\n',
 'on, Alice started to her feet, for it flashed across her mind that she\n',
 'large rabbit-hole under the hedge.\n',
 'In another moment down went Alice after it, never once considering how\n',
 'dippe

In [None]:
filtered_lines = get_good_not_bad(afile, good_words=["CHAPTER"])

24 lines with ['CHAPTER'] and none with ()


In [None]:
filtered_lines

[' CHAPTER I.     Down the Rabbit-Hole\n',
 ' CHAPTER II.    The Pool of Tears\n',
 ' CHAPTER III.   A Caucus-Race and a Long Tale\n',
 ' CHAPTER IV.    The Rabbit Sends in a Little Bill\n',
 ' CHAPTER V.     Advice from a Caterpillar\n',
 ' CHAPTER VI.    Pig and Pepper\n',
 ' CHAPTER VII.   A Mad Tea-Party\n',
 ' CHAPTER VIII.  The Queen’s Croquet-Ground\n',
 ' CHAPTER IX.    The Mock Turtle’s Story\n',
 ' CHAPTER X.     The Lobster Quadrille\n',
 ' CHAPTER XI.    Who Stole the Tarts?\n',
 ' CHAPTER XII.   Alice’s Evidence\n',
 'CHAPTER I.\n',
 'CHAPTER II.\n',
 'CHAPTER III.\n',
 'CHAPTER IV.\n',
 'CHAPTER V.\n',
 'CHAPTER VI.\n',
 'CHAPTER VII.\n',
 'CHAPTER VIII.\n',
 'CHAPTER IX.\n',
 'CHAPTER X.\n',
 'CHAPTER XI.\n',
 'CHAPTER XII.\n']

In [None]:
more_filtered = [line for line in filtered_lines if "  " in line]
more_filtered

[' CHAPTER I.     Down the Rabbit-Hole\n',
 ' CHAPTER II.    The Pool of Tears\n',
 ' CHAPTER III.   A Caucus-Race and a Long Tale\n',
 ' CHAPTER IV.    The Rabbit Sends in a Little Bill\n',
 ' CHAPTER V.     Advice from a Caterpillar\n',
 ' CHAPTER VI.    Pig and Pepper\n',
 ' CHAPTER VII.   A Mad Tea-Party\n',
 ' CHAPTER VIII.  The Queen’s Croquet-Ground\n',
 ' CHAPTER IX.    The Mock Turtle’s Story\n',
 ' CHAPTER X.     The Lobster Quadrille\n',
 ' CHAPTER XI.    Who Stole the Tarts?\n',
 ' CHAPTER XII.   Alice’s Evidence\n']

In [None]:

with open('..\\..\\.gitconfig', mode='r') as fin:
    cont = fin.read()
print(cont)
#?open

[user]
	email = valdis.s.coding@gmail.com
	name = Valdis SCoding



In [None]:
#mode=w is write only and it deletes OLD file!!
with open('numbers.txt', mode='w') as fwriter:
    for n in range(1,10):
        fwriter.write(f'The number is {n*2} \n')
        #fout.write(f'The number is {n}') # diferences between \n and no \n
    #file is closed is here    

In [None]:
len('The number is {n*2} \n')*9

189

In [None]:
with open('numbers.txt') as freader:
    mytext = freader.read()
len(mytext)

158

In [None]:
with open('numbers.txt') as freader:
    sum = 0
    for line in freader:
        print(line)
        print(len(line))
        sum += len(line)
print(sum)

The number is 2 

17
The number is 4 

17
The number is 6 

17
The number is 8 

17
The number is 10 

18
The number is 12 

18
The number is 14 

18
The number is 16 

18
The number is 18 

18
158


In [None]:
from datetime import datetime
now = datetime.now()
now

datetime.datetime(2019, 7, 18, 16, 47, 28, 58718)

In [None]:
today = datetime.today()
print(today)

2019-07-18 16:47:32.332155


In [None]:
#mode=w is write only and it deletes OLD file!!
today = datetime.today()
with open(f'numbers{today.hour}_{today.minute}_{today.second}.txt', mode='w') as fwriter:
    for n in range(1,10):
        fwriter.write(f'The number is {n*2} \n')
        #fout.write(f'The number is {n}') # diferences between \n and no \n
    #file is closed is here    

In [None]:
today

datetime.datetime(2019, 7, 18, 16, 48, 38, 942791)

In [None]:
today.hour

16

In [None]:
today.minute

48

In [None]:
datetime.today()

datetime.datetime(2019, 7, 18, 16, 50, 0, 396834)

In [None]:
timestamp = datetime.timestamp(now)
timestamp

1563457648.058718

In [None]:
str(datetime.now())

'2019-05-21 16:33:12.606716'

In [None]:
with open('numbers.txt') as fin:
    print(fin.readlines())

['The number is 2 \n', 'The number is 4 \n', 'The number is 6 \n', 'The number is 8 \n', 'The number is 10 \n', 'The number is 12 \n', 'The number is 14 \n', 'The number is 16 \n', 'The number is 18 \n']


In [None]:
with open('numbers.txt', mode='r') as fin:
    for line in fin:
        print(line, end="")

The number is 2 
The number is 4 
The number is 6 
The number is 8 
The number is 10 
The number is 12 
The number is 14 
The number is 16 
The number is 18 


In [None]:
# We can append to the files without overwriting
with open('numbers.txt', mode='a') as fin:
    fin.write("This might not be the end\nThis is really the end")

In [None]:
with open('numbers.txt', mode='a') as fin:
    fin.seek(4) # seek will not work here for writing
    fin.write("This is not the end")

In [None]:
with open('numbers.txt') as f:
    print(f.read())

The number is 2 
The number is 4 
The number is 6 
The number is 8 
The number is 10 
The number is 12 
The number is 14 
The number is 16 
The number is 18 
This might not be the endThis might not be the endThis might not be the end
This is really the endThis is not the end


In [None]:
# Jupyter magic for reading files into notebook
%%readfile numbers.txt
The number is 2 
The number is 4 
The number is 6 
The number is 8 
The number is 10 
The number is 12 
The number is 14 
The number is 16 
The number is 18 
This is the end!

SyntaxError: invalid syntax (<ipython-input-74-1a07fac4098f>, line 3)

In [None]:
# Generally Preferably to read and write separately - VS

In [None]:
with open('somefile.txt', mode="r+") as f:
    print(f.readlines())
    f.write('moreinfo\tmore\n')

['Oh my text first line\n', 'Super easy second line\n', 'OH and third line is\n']


In [None]:
with open('sometext.txt') as f:
    print(f.readlines())

['moreinfo\tmore\n', 'moreinfo\tmore\n', 'moreinfo\tmore\n', 'moreinfo\tmore\n', 'moreinfo\tmore\n', 'moreinfo\tmore\n', 'moreinfo\tmore\n', 'moreinfo\tmore\n']


### Modes:
  *  mode='r' - Read Only
  * 'w' - Write Only (and will overwrite existing files!!!)
  * 'a' - Apend Only (stream is at the end of file!)
  * 'r+' - Read and Write
  * 'w+' - Write and Read with Overwriting existing/make new files
  
  From C (fopen)
   * ``r+''  Open for reading and writing.  The stream is positioned at the
         beginning of the file.
         
    *   ``w+''  Open for reading and writing.  The file is created if it does not
         exist, otherwise it is truncated(**destroyed!**).  The stream is positioned at
         the beginning of the file.    

In [None]:
with open('numbers.txt', mode='a') as f:
    f.write("New Line\n")
    

In [None]:
!cd data/

In [None]:
!dir

 Volume in drive C is Windows
 Volume Serial Number is 5AA0-2A07

 Directory of C:\Users\val-p1\Github\RCS_Data_Analysis_Python_2019_July

07/18/2019  04:57 PM    <DIR>          .
07/18/2019  04:57 PM    <DIR>          ..
07/18/2019  04:07 PM    <DIR>          .ipynb_checkpoints
07/13/2019  10:17 AM             7,614 All_Any.ipynb
07/18/2019  04:03 PM    <DIR>          data
07/13/2019  10:17 AM         7,435,248 Data_Analysis_Python_Introduction.pdf
07/13/2019  10:17 AM             1,713 Git_Workflow.md
07/16/2019  04:04 PM    <DIR>          img
07/13/2019  10:17 AM               217 Jupyter with Python.md
07/13/2019  10:17 AM             1,084 LICENSE
07/18/2019  04:52 PM               285 numbers.txt
07/18/2019  04:48 PM               167 numbers16_48_38.txt
07/18/2019  04:48 PM               167 numbers16_48_9.txt
07/13/2019  10:17 AM            38,870 Python Classes.ipynb
07/13/2019  10:17 AM            47,880 Python Cleaning Up Text Files.ipynb
07/13/2019  10:17 AM               4

In [None]:
mylist = list(range(1,30))
mytextlist = [str(x)+'\n' for x in range(1,30)]
with open('list.txt', mode='w') as fwriter:
    fwriter.writelines(mytextlist)


In [None]:
with open('biglist.txt', mode='w') as fwriter:
    for n in range(1,1_000_000):
        fwriter.write('Line:'+ str(n)+'\n')


In [None]:
# Optimal writing for large files will be between how much you can hold in memory and how big the file will be

## More on String Formatting

## How do you convert values to strings? 

### In Python: pass it to the repr() or str() functions.

The str() function is meant to return representations of values which are fairly human-readable, while repr() is meant to generate representations which can be read by the interpreter (or will force a SyntaxError if there is no equivalent syntax). For objects which don’t have a particular representation for human consumption, str() will return the same value as repr(). 

In [None]:
print(str(34341235421))
print(repr(54))
print(str(54))
str(54)==repr(54)

34341235421
54
54


True

In [None]:
hi = "Hello\n"
hir = repr(hi)
print(hi)
print(hir)
hir==hi

Hello

'Hello\n'


False

### The goal of __repr__ is to be unambiguous: 
### The goal of __str__ is to be readable

## In other words: __repr__ is for developers, __str__ is for customers (end users)

In [None]:
for x in range(1,12):
    print(f'{x:2d} {x*x:3d} {x**3:4f}')

 1   1 1.000000
 2   4 8.000000
 3   9 27.000000
 4  16 64.000000
 5  25 125.000000
 6  36 216.000000
 7  49 343.000000
 8  64 512.000000
 9  81 729.000000
10 100 1000.000000
11 121 1331.000000


In [None]:
'-3.14'.zfill(7) # pads numeric string on the left with zeros

'-003.14'

In [None]:
# write to a text file squares.txt first 10 squares
# 1 squared is 1
# 2 squared is 4
for n in range(1,11):
    print(f'{n} squared is {n*n}')


1 squared is 1
2 squared is 4
3 squared is 9
4 squared is 16
5 squared is 25
6 squared is 36
7 squared is 49
8 squared is 64
9 squared is 81
10 squared is 100


In [None]:
# write to a text file squares.txt first 10 squares
# 1 squared is 1
# 2 squared is 4
with open('squares.txt', mode='w') as f:
    for n in range(1,11):
        myline = f'{n} squared is {n*n}\n'
        print(myline)
        f.write(myline)


1 squared is 1

2 squared is 4

3 squared is 9

4 squared is 16

5 squared is 25

6 squared is 36

7 squared is 49

8 squared is 64

9 squared is 81

10 squared is 100



In [None]:
# write to a text file squares.txt first 10 squares
# 1 squared is 1
# 2 squared is 4
with open('squares2.txt', mode='w') as f:
    mylines = [f'{n} squared is {n*n}\n' for n in range(1,11)]
    f.writelines(mylines)

In [None]:
with open('squares2.txt') as f:
    for line in f:
        # do something with line for example print it
        print(line, end="")

1 squared is 1
2 squared is 4
3 squared is 9
4 squared is 16
5 squared is 25
6 squared is 36
7 squared is 49
8 squared is 64
9 squared is 81
10 squared is 100


In [None]:
type(f)

_io.TextIOWrapper

In [None]:
dir(f)

['_CHUNK_SIZE',
 '__class__',
 '__del__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__enter__',
 '__eq__',
 '__exit__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__lt__',
 '__ne__',
 '__new__',
 '__next__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '_checkClosed',
 '_checkReadable',
 '_checkSeekable',
 '_checkWritable',
 '_finalizing',
 'buffer',
 'close',
 'closed',
 'detach',
 'encoding',
 'errors',
 'fileno',
 'flush',
 'isatty',
 'line_buffering',
 'mode',
 'name',
 'newlines',
 'read',
 'readable',
 'readline',
 'readlines',
 'reconfigure',
 'seek',
 'seekable',
 'tell',
 'truncate',
 'writable',
 'write',
 'write_through',
 'writelines']

In [None]:
f.close()

In [None]:
f.readlines()

ValueError: I/O operation on closed file.

In [None]:
## Homework
## Write function which writes Fizzbuzz 1 to 100 (5,7) to file fizzbuzz.txt
## Format example:
## 1: 1
## 2: 2
## 5: Fizz
## 6: 6
## 7: Buzz
#... 35: FizzBuzz

In [None]:
len(mytext)

12875

In [None]:
with open('../data/Veidenbaums.txt', encoding='UTF-8') as f:
    veidtext = [line for line in f]
len(veidtext)

971

In [None]:
veidtext[:10]

['\n',
 '\n',
 'Pēc ideāliem cenšas lielie gari***\n',
 '\n',
 '\n',
 '\n',
 '\n',
 'Pēc ideāliem cenšas lielie gari,\n',
 '\n',
 'Bet dzīvē ieņemt vietu pirmie\n']