##  File I/O (Input / Output)

## Jupyter only writing to text file
* %%writefile filename.ext

Writes in the working directory (first run pwd) 

In [None]:
%%writefile somefile.txt
Oh my text first line
Super easy second line
OH and third line is

In [None]:
%%writefile myFunctions.py
def myCoolFun(a,b):
    return a+b

def mySecondFun(a,b):
    return a*b



In [None]:
import myFunctions

In [None]:
myFunctions.myCoolFun(5,6)

In [1]:
import myFunctions as mf

In [2]:
mf.myCoolFun(5,6)

11

In [3]:
mf.mySecondFun(6,7)

42

In [None]:
import mylib

In [None]:
mylib.nb_year(100,2,0,200)

In [None]:
from mylib import nb_year

In [None]:
nb_year(100,20,20,200)

In [None]:
from mylib import nb_year as mynb


In [None]:
mynb(555,55,100,10000)

In [4]:
# filein is our own name for file 
filein = open('somefile.txt')

In [5]:
type(filein)

_io.TextIOWrapper

In [None]:
filein.

In [6]:

mytext = filein.read() # usually not that useful
print(mytext)

Oh my text first line
Super easy second line
OH and third line is



In [7]:
filein.read()

''

In [8]:
# there can be many pointers to file stream
fin = filein

In [9]:
fin.name

'somefile.txt'

In [10]:
fin.read() # what will happen ?

''

In [11]:
fin.seek(2)
fin.read()

' my text first line\nSuper easy second line\nOH and third line is\n'

In [12]:
fin.seek(6)
text6= fin.read()
print(text6)

text first line
Super easy second line
OH and third line is



In [13]:
# we seek the very beginning of the file
fin.seek(0)
fulltext = fin.read()
print(fulltext)

Oh my text first line
Super easy second line
OH and third line is



In [14]:
fulltext[6:]

'text first line\nSuper easy second line\nOH and third line is\n'

In [15]:
fin.seek(0)
mylines=fin.readlines()
mylines

['Oh my text first line\n',
 'Super easy second line\n',
 'OH and third line is\n']

In [16]:
type(mylines)

list

In [17]:
len(mylines)

3

In [18]:
# list comprehension to generate line lengths
linelengths = [len(line) for line in mylines]
linelengths

[22, 23, 21]

In [None]:
mylines[0]

In [19]:
print(mylines[0])

Oh my text first line



In [None]:
# if we need the representation of string then we use built in repr function
print(mylines[0])
print(repr(mylines[0])) # same as mylines[0] as the first last item
mylines[0]

In [20]:
# we go trough each line and do some work
for line in mylines:
    print(f'My Line Is:{line}')
    # Do more work on lines

My Line Is:Oh my text first line

My Line Is:Super easy second line

My Line Is:OH and third line is



In [24]:
"He said 'nice weather' , really"

"He said 'nice weather' , really"

In [21]:
# We can go throught the file directly (without reading the whole file into memory)
fin.seek(0)
# we can go through very large files here
for line in fin:
    print(line, end='')

Oh my text first line
Super easy second line
OH and third line is


In [23]:
for line  in mylines:
    print(line, end='')

Oh my text first line
Super easy second line
OH and third line is


In [None]:
#close the file if you .open() it!!
fin.close()

## Use with open always! 

* closes automatically!
* throws exceptions on errors

In [25]:
with open('somefile.txt') as fin:
    for line in fin:
        # do something with each line
        print(line)

Oh my text first line

Super easy second line

OH and third line is



In [27]:
# Idiom on how to open AND close a file for reading and doing work
with open('somefile.txt') as fin:
    results = []
    for line in fin:
        results.append(line.count('a'))
        print(line)
        # do wo with each line here,save into a list or other structure
    # we can do more work with file here
    # maybe fin.seek(0) to read it again for some reason
    # File will be closed once this line ends
    
print("file is closed already here")
print(results)
#closes here!
#closes automatically!!! 

Oh my text first line

Super easy second line

OH and third line is

Lorem ipsum some text some more abracadabra

file is closed already here
[0, 1, 1, 5]


In [None]:
fin.read()

## For MacOS and Linux
* use pwd to see where you are
### myfile = open("/Users/MyUserName/SomeFolder/MaybeAnotherFolder/myfile.txt")

## For Windows
* use pwd to see where you are
### myfile = open("C:\\Users\\MyUserName\\SomeFolder\\MaybeAnotherFolder\\myfile.txt")

In [None]:
# Jupyter Magic !someOScommand for example !dir or !ls
!dir

In [None]:
pwd

In [None]:
!dir

In [None]:
pwd

In [29]:
# importing OS specific library for system work
import os

In [30]:
cwd = os.getcwd()
cwd

'C:\\Users\\val-p1\\Github\\RCS_ML_01_20\\Python_Core'

In [31]:
with open(cwd+'\\somefile.txt') as f:
    for line in f:
        print(line)

Oh my text first line

Super easy second line

OH and third line is

Lorem ipsum some text some more abracadabra



In [32]:
# join path no mater what OS we have
fullpath = os.path.join(cwd, 'somefile.txt')
fullpath

'C:\\Users\\val-p1\\Github\\RCS_ML_01_20\\Python_Core\\somefile.txt'

In [33]:
os.path.join(os.getcwd(), "myimages", "catpictures", "mycat.png")

'C:\\Users\\val-p1\\Github\\RCS_ML_01_20\\Python_Core\\myimages\\catpictures\\mycat.png'

In [34]:
# i get the current working directory and join the file path
with open(os.path.join(os.getcwd(), 'somefile.txt')) as f:
    for line in f:
        print(line)

Oh my text first line

Super easy second line

OH and third line is

Lorem ipsum some text some more abracadabra



In [35]:
with open('../data/Veidenbaums.txt', encoding='UTF-8') as f:
    mytext = f.read()
len(mytext)

12875

In [36]:
mytext[:100]

'\n\nPēc ideāliem cenšas lielie gari***\n\n\n\n\nPēc ideāliem cenšas lielie gari,\n\nBet dzīvē ieņemt vietu pi'

In [39]:
mytext[-100:]

'a, cik siena viņas dos,\n\nLai mani nelasa. Pie manām dzejām\n\nTik piktu prātu viņš sev iemantos.\n\n\n\n\n\n'

In [40]:
with open('../data/Veidenbaums.txt', encoding='UTF-8') as f:
    mylines = f.readlines()
len(mylines)

971

In [41]:
cleanlines = [line.strip() for line in mylines]
len(cleanlines)

971

In [42]:
textlines = [line for line in cleanlines if len(line) > 0]
len(textlines)

392

In [43]:
textlines[100:105]

['Daudz prātīgu cilvēku pasaulē ***',
 'Daudz prātīgu cilvēku pasaulē dzīvo:',
 'Tie strādā kā skudras un rūpīgi krāj,',
 'Tie nolād alu, tie nolād sīvo',
 'Un strādā, un krāj, līdz smiltis tos klāj.']

In [44]:
'***' in textlines[100]

True

In [45]:
'***' in textlines[101]

False

In [46]:
noheadlines = [line for line in textlines if not '***' in line]
len(noheadlines)

366

In [47]:
noheadlines[:10]

['Pēc ideāliem cenšas lielie gari,',
 'Bet dzīvē ieņemt vietu pirmie',
 'Tie neiespēj, tos nomāc maizes kari,',
 'Tos nomāc aizspriedumi sirmie.',
 'Virs zemes nav taisnības, dūrei tik spēks,',
 'Kas varmākām skādi dar, nosaukts tiek grēks.',
 'Par tiesnešiem cienīti blēži sēž',
 'Un godīgie ādu nost citiem plēš.',
 'Un cienīgs tēvs, zaglis, teic sprediķus;',
 '"Tik pacieties, debesīs labāki būs!"']

In [51]:
"# #".join(["Valdis", "likes", "food"])

'Valdis# #likes# #food'

In [53]:
poemtext= " ".join(noheadlines)
len(poemtext), poemtext[:50]

(11549, 'Pēc ideāliem cenšas lielie gari, Bet dzīvē ieņemt ')

In [54]:
words = poemtext.split(" ")
len(words)

1894

In [55]:
words[:20]

['Pēc',
 'ideāliem',
 'cenšas',
 'lielie',
 'gari,',
 'Bet',
 'dzīvē',
 'ieņemt',
 'vietu',
 'pirmie',
 'Tie',
 'neiespēj,',
 'tos',
 'nomāc',
 'maizes',
 'kari,',
 'Tos',
 'nomāc',
 'aizspriedumi',
 'sirmie.']

In [56]:
badchars = ".,:'\"!?"

In [57]:
for c in badchars:
    print(c)

.
,
:
'
"
!
?


In [58]:
print(len(poemtext))
for c in badchars:
    poemtext = poemtext.replace(c, "")
len(poemtext)

11549


11081

In [59]:
words = poemtext.split()
len(words)

1893

In [61]:
lowercase = [word.lower() for word in words]
len(lowercase), lowercase[:10]

(1893,
 ['pēc',
  'ideāliem',
  'cenšas',
  'lielie',
  'gari',
  'bet',
  'dzīvē',
  'ieņemt',
  'vietu',
  'pirmie'])

In [62]:
from collections import Counter

In [64]:
wordcount = Counter(lowercase)

In [65]:
wordcount.most_common(20)

[('un', 76),
 ('ir', 24),
 ('-', 23),
 ('tik', 21),
 ('vēl', 21),
 ('tu', 21),
 ('bet', 15),
 ('kas', 15),
 ('nav', 14),
 ('man', 14),
 ('par', 13),
 ('kā', 13),
 ('kur', 13),
 ('lai', 12),
 ('pēc', 11),
 ('ar', 11),
 ('tev', 11),
 ('kam', 11),
 ('tie', 10),
 ('ka', 10)]

In [69]:
specialwords = ['alu', 'ala', 'opa']

In [70]:

longwords = [word for word in lowercase if len(word) > 3 or word in specialwords]
len(lowercase),len(longwords)

(1893, 1294)

In [71]:
longcount = Counter(longwords)
longcount.most_common(20)

[('reiz', 10),
 ('viss', 9),
 ('līdz', 8),
 ('mums', 7),
 ('sauc', 6),
 ('gars', 6),
 ('projām', 6),
 ('laiks', 5),
 ('sirds', 5),
 ('tomēr', 5),
 ('likumīgi', 5),
 ('dzīves', 5),
 ('iedzer', 5),
 ('tiek', 4),
 ('daudz', 4),
 ('vaigs', 4),
 ('kājām', 4),
 ('pasaules', 4),
 ('bija', 4),
 ('kaut', 4)]

In [72]:
longcount['alus']

2

In [73]:
longcount['alu']

3

In [None]:
#mode=w is write only and it deletes OLD file!!
with open('numbers.txt', mode='w') as fwriter:
    for n in range(1,10):
        fwriter.write(f'The number is {n*2} \n')
        #fout.write(f'The number is {n}') # diferences between \n and no \n
    #file is closed is here    

In [None]:
len('The number is {n*2} \n')*9

In [None]:
with open('numbers.txt') as freader:
    mytext = freader.read()
len(mytext)

In [None]:
with open('numbers.txt') as freader:
    sum = 0
    for line in freader:
        print(line)
        print(len(line))
        sum += len(line)
print(sum)

In [None]:
from datetime import datetime
now = datetime.now()
now

In [None]:
today = datetime.today()
print(today)

In [None]:
#mode=w is write only and it deletes OLD file!!
today = datetime.today()
with open(f'numbers{today.hour}_{today.minute}_{today.second}.txt', mode='w') as fwriter:
    for n in range(1,10):
        fwriter.write(f'The number is {n*2} \n')
        #fout.write(f'The number is {n}') # diferences between \n and no \n
    #file is closed is here    

In [None]:
today

In [None]:
today.hour

In [None]:
today.minute

In [None]:
datetime.today()

In [None]:
timestamp = datetime.timestamp(now)
timestamp

In [None]:
str(datetime.now())

In [None]:
with open('numbers.txt') as fin:
    print(fin.readlines())

In [None]:
with open('numbers.txt', mode='r') as fin:
    for line in fin:
        print(line, end="")

In [None]:
# We can append to the files without overwriting
with open('numbers.txt', mode='a') as fin:
    fin.write("This might not be the end\nThis is really the end")

In [None]:
with open('numbers.txt', mode='a') as fin:
    fin.seek(4) # seek will not work here for writing
    fin.write("This is not the end")

In [None]:
with open('numbers.txt') as f:
    print(f.read())

In [None]:
# Jupyter magic for reading files into notebook
%%readfile numbers.txt
The number is 2 
The number is 4 
The number is 6 
The number is 8 
The number is 10 
The number is 12 
The number is 14 
The number is 16 
The number is 18 
This is the end!

In [None]:
# Generally Preferably to read and write separately - VS

In [None]:
with open('somefile.txt', mode="r+") as f:
    print(f.readlines())
    f.write('moreinfo\tmore\n')

In [None]:
with open('sometext.txt') as f:
    print(f.readlines())

### Modes:
  *  mode='r' - Read Only
  * 'w' - Write Only (and will overwrite existing files!!!)
  * 'a' - Apend Only (stream is at the end of file!)
  * 'r+' - Read and Write
  * 'w+' - Write and Read with Overwriting existing/make new files
  
  From C (fopen)
   * ``r+''  Open for reading and writing.  The stream is positioned at the
         beginning of the file.
         
    *   ``w+''  Open for reading and writing.  The file is created if it does not
         exist, otherwise it is truncated(**destroyed!**).  The stream is positioned at
         the beginning of the file.    

In [None]:
with open('numbers.txt', mode='a') as f:
    f.write("New Line\n")
    

In [None]:
!cd data/

In [None]:
!dir

In [None]:
mylist = list(range(1,30))
mytextlist = [str(x)+'\n' for x in range(1,30)]
with open('list.txt', mode='w') as fwriter:
    fwriter.writelines(mytextlist)


In [None]:
with open('biglist.txt', mode='w') as fwriter:
    for n in range(1,1_000_000):
        fwriter.write('Line:'+ str(n)+'\n')


In [None]:
# Optimal writing for large files will be between how much you can hold in memory and how big the file will be

## More on String Formatting

## How do you convert values to strings? 

### In Python: pass it to the repr() or str() functions.

The str() function is meant to return representations of values which are fairly human-readable, while repr() is meant to generate representations which can be read by the interpreter (or will force a SyntaxError if there is no equivalent syntax). For objects which don’t have a particular representation for human consumption, str() will return the same value as repr(). 

In [None]:
print(str(34341235421))
print(repr(54))
print(str(54))
str(54)==repr(54)

In [None]:
hi = "Hello\n"
hir = repr(hi)
print(hi)
print(hir)
hir==hi

### The goal of __repr__ is to be unambiguous: 
### The goal of __str__ is to be readable

## In other words: __repr__ is for developers, __str__ is for customers (end users)

In [None]:
for x in range(1,12):
    print(f'{x:2d} {x*x:3d} {x**3:4f}')

In [None]:
'-3.14'.zfill(7) # pads numeric string on the left with zeros

In [None]:
# write to a text file squares.txt first 10 squares
# 1 squared is 1
# 2 squared is 4
for n in range(1,11):
    print(f'{n} squared is {n*n}')


In [None]:
# write to a text file squares.txt first 10 squares
# 1 squared is 1
# 2 squared is 4
with open('squares.txt', mode='w') as f:
    for n in range(1,11):
        myline = f'{n} squared is {n*n}\n'
        print(myline)
        f.write(myline)


In [None]:
# write to a text file squares.txt first 10 squares
# 1 squared is 1
# 2 squared is 4
with open('squares2.txt', mode='w') as f:
    mylines = [f'{n} squared is {n*n}\n' for n in range(1,11)]
    f.writelines(mylines)

In [None]:
with open('squares2.txt') as f:
    for line in f:
        # do something with line for example print it
        print(line, end="")

In [None]:
type(f)

In [None]:
dir(f)

In [None]:
f.close()

In [None]:
f.readlines()

In [None]:
## Homework
## Write function which writes Fizzbuzz 1 to 100 (5,7) to file fizzbuzz.txt
## Format example:
## 1: 1
## 2: 2
## 5: Fizz
## 6: 6
## 7: Buzz
#... 35: FizzBuzz

In [None]:
len(mytext)

In [None]:
with open('../data/Veidenbaums.txt', encoding='UTF-8') as f:
    veidtext = [line for line in f]
len(veidtext)

In [None]:
veidtext[:10]