## Writing Any File Data

In [3]:
# Save the header to a variable. 
# id<tab>animal<new line>
header = "id\tanimal\n"
# Save 2 records to a variable
# 123     Cat
# 456     Dog
data = [[123,'Cat'],[456,'Dog']]
with open('somedata.txt', 'w') as file_handler:
    #First write the header
    file_handler.write(header)
    #Then loop through the data and write it 
    #with the same format as the header
    #id<tab>animal<newline>
    for record in data:
        #The id is an integer, and we need to write out strings
        file_handler.write(str(record[0]) + "\t" + record[1] + "\n")
    #Its good practice to close files when finished with them
    file_handler.close()
    

In [11]:
#bash commands can be run from jupyter with a ! at the start of the line
#cat will output the full contents of a file
!cat somedata.txt

id	animal
123	Cat
456	Dog


<br><br>
## Reading Any File Data

In [12]:
data = ""
with open('somedata.txt', 'r') as file_handler:
    #Read in ALL the data
    data = file_handler.read()
    #Now that `data` is holding the file data, we can close the file
    file_handler.close()
    
print(data)

id	animal
123	Cat
456	Dog



### Parsing Structured Data

In [14]:
parsed_data = []
#The data is one long string, and each line is separated with a "\n"
for line in data.split("\n"):
    #Once split into lines, each line can be split into fields, 
    #and the result can be added to the parsed_data list variable
    parsed_data.append(line.split("\t"))
print(parsed_data)

#The header is the first list in the list
header = parsed_data[0]
#Everything after the header are the data records
data_records = parsed_data[1:]

print("Header: ", header)
print("Records: ", data_records)

[['id', 'animal'], ['123', 'Cat'], ['456', 'Dog'], ['']]
Header:  ['id', 'animal']
Records:  [['123', 'Cat'], ['456', 'Dog'], ['']]


Why is there one extra empty record?
Because the last line has a "\n" at the end of the line, and when the split function is used it treats both sides of the thing it split as new values, even if there's nothing there. a,b.split(",") -> [a,b] and "a.".split(",") -> [a, None]

<br><br>
## Writing CSV Data

In [18]:
import csv
# Save the header to a variable. 
# This time its a list
header = ["id","animal"]
# Save 2 records to a variable
# 123     Cat
# 456     Dog
data = [[123,'Cat'],[456,'Dog']]
with open('somedata.csv', 'w') as file_handler:
    csv_writer = csv.writer(file_handler, delimiter=",")
    #Write out a single row
    csv_writer.writerow(header)
    #Write out multiple rows
    csv_writer.writerows(data)
    #Release the file lock
    file_handler.close()
    

In [19]:
!cat somedata.csv

id,animal
123,Cat
456,Dog


### Nifty Trick With Lists 

In [20]:
#Here is some cool syntax that will create one big list that looks like 
# [['id', 'animal'], [123, 'Cat'], [456, 'Dog']]
# We wrapped the two terms in [ ] to say the end result will be a list
# and the * says to dump the list of items. 
# The end result is the ability to mix a list with a list of lists.
[header, *data]

[['id', 'animal'], [123, 'Cat'], [456, 'Dog']]

In [23]:
#Theres no limit to how much that can be done 
#(imagine data were different lists):
[header, *data, *data]

[['id', 'animal'], [123, 'Cat'], [456, 'Dog'], [123, 'Cat'], [456, 'Dog']]

<br><br>
## Reading CSV Data

In [25]:
import csv
data = []
with open('somedata.csv', 'r') as file_handler:
    #Read in ALL the data
    file_iterator = csv.reader(file_handler, delimiter=",")
    for row in file_iterator:
        data.append(row)
    #Now that `data` is holding the file data, we can close the file
    #If this was a very large file, we would not read all the data in
    #storing it to a single variable.
    #Instead we would do all the work on each row we needed to before 
    #moving to the next row.
    #The limitation is that it makes it harder to do operations across rows.
    file_handler.close()

#The result is a list of lists
print(data)

[['id', 'animal'], ['123', 'Cat'], ['456', 'Dog']]


In [26]:
#The header is the first list in the list
header = data[0]
#Everything after the header are the data records
data_records = data[1:]

print("Header: ", header)
print("Records: ", data_records)

Header:  ['id', 'animal']
Records:  [['123', 'Cat'], ['456', 'Dog']]


<br><br>
### Its python so there's always another way to write something
#### Perpetual file lock, no with

In [4]:
file_handler = open('somedata.txt', 'r')
print(file_handler)

<_io.TextIOWrapper name='somedata.txt' mode='r' encoding='UTF-8'>


In [5]:
# Now that we have a file lock, we can do whatever we want with it,
# but we MUST release the lock before the program terminates 
print(file_handler.read())
file_handler.close()

id	animal
123	Cat
456	Dog



In [6]:
!rm somedata.txt somedata.csv

rm: somedata.csv: No such file or directory
