# Working with external files

#### Open file (simple)

In [13]:
try:
    # With this method, you have to close the file manually.
    f = open('C:/Users/owner/Desktop/Notepad/pythonquotes.txt') 
    filecontents = f.read()
    print(filecontents)
    f.close()
except FileNotFoundError:
    print("File not found")
finally:
    print('File is closed? ', f.closed)  

No one can make you feel inferior without your consent.
Eleanor Roosevelt
Don't tell me not to reach for the stars when there are footprints on the moon.
Anonymous
I've been through some terrible things.  Some of which actually happened.
Mark Twain
File is closed?  True


#### Open file (contextual) - preferred method

In [15]:
# With this method, the file is closed automatically
with open('C:/Users/owner/Desktop/Notepad/pythonquotes.txt') as f:
    filecontents = f.read()
    print(filecontents) 

print('File is closed? ', f.closed) # True  

No one can make you feel inferior without your consent.
Eleanor Roosevelt
Don't tell me not to reach for the stars when there are footprints on the moon.
Anonymous
I've been through some terrible things.  Some of which actually happened.
Mark Twain
File is closed?  True


#### Open image (contextual)

In [16]:
# Will print gobbly-gook and not image (gobbly-gook is exepcted behaviour)

# Arguments: read, binary
with open('C:/Users/owner/Desktop/Notepad/PythonBasketballPic.jpg', 'rb') as f: 
    filecontents = f.read()
    print(filecontents) 

b'\xff\xd8\xff\xe0\x00\x14JFIF\x00\x01\x01\x01\x01,\x01,\x00\x00AMPF\xff\xe1\n\xf0Exif\x00\x00MM\x00*\x00\x00\x00\x08\x00\x0e\x01\x0f\x00\x02\x00\x00\x00\x06\x00\x00\x00\xb6\x01\x10\x00\x02\x00\x00\x00\x12\x00\x00\x00\xbc\x01\x12\x00\x03\x00\x00\x00\x01\x00\x06\x00\x00\x01\x1a\x00\x05\x00\x00\x00\x01\x00\x00\x00\xce\x01\x1b\x00\x05\x00\x00\x00\x01\x00\x00\x00\xd6\x01(\x00\x03\x00\x00\x00\x01\x00\x02\x00\x00\x011\x00\x02\x00\x00\x00\x07\x00\x00\x00\xde\x012\x00\x02\x00\x00\x00\x14\x00\x00\x00\xe6\x01<\x00\x02\x00\x00\x00\x12\x00\x00\x00\xfa\x01B\x00\x04\x00\x00\x00\x01\x00\x00\x02\x00\x01C\x00\x04\x00\x00\x00\x01\x00\x00\x02\x00\x02\x13\x00\x03\x00\x00\x00\x01\x00\x01\x00\x00\x87i\x00\x04\x00\x00\x00\x01\x00\x00\x01\x0c\x88%\x00\x04\x00\x00\x00\x01\x00\x00\t\xec\x00\x00\x00\x00Apple\x00iPhone 13 Pro Max\x00\x00\x00\x00H\x00\x00\x00\x01\x00\x00\x00H\x00\x00\x00\x0116.1.1\x00\x002022:12:10 08:57:35\x00iPhone 13 Pro Max\x00\x00%\x82\x9a\x00\x05\x00\x00\x00\x01\x00\x00\x02\xce\x82\x9d\x00\x

#### Open text utf-8 (contextual)

In [18]:
with open('C:/Users/owner/Desktop/Notepad/pythonnames.txt', encoding='utf-8') as f: 
    filecontents = f.read()
    print(filecontents) 

Claire Dunphy
Phil Dunphy
Haley Dunphy
Alex Dunphy
Luke Dunphy


#### Open text utf-8 (contextual)

In [20]:
with open('C:/Users/owner/Desktop/Notepad/pythonnames.txt', encoding='utf-8') as f: 
    filecontents = f.read()
    print(filecontents) 

Claire Dunphy
Phil Dunphy
Haley Dunphy
Alex Dunphy
Luke Dunphy


#### Reading file content options (contextual)

In [32]:
with open('C:/Users/owner/Desktop/Notepad/pythonnames.txt') as f: 
    
    # Read the entire file    
    content = f.read()
    print(content)
    
    # Read the contents into a list.  If you keep the read() above in the code, 
    # this list will be empty because the entire file has already been read.
    content = f.readlines()
    print(content)
    
    # Read one line from the file - same rule as above applies.  If you read all of the file in either bit of logic
    # above, then there will be nothing for readline() to read.
    content = f.readline()
    print(content)

Claire Dunphy
Phil Dunphy
Haley Dunphy
Alex Dunphy
Luke Dunphy
[]



#### Looping through a file while reading it in all at once

In [24]:
# Reading in an antire file at once is okay when you know the size of the file.  If you don't know the size of the file,
# you should read it in line by line.  This is also more memory efficient.

with open('C:/Users/owner/Desktop/Notepad/pythonquotes.txt') as f: 
    # Read all lines first, then loop through them
    for one_line in f.readlines():
        print(one_line, end='') 

No one can make you feel inferior without your consent.
Eleanor Roosevelt
Don't tell me not to reach for the stars when there are footprints on the moon.
Anonymous
I've been through some terrible things.  Some of which actually happened.
Mark Twain

#### Using 'enumerate' to use the line number

In [26]:
with open('C:/Users/owner/Desktop/Notepad/pythonquotes.txt') as f: 
    # Read all lines first, then loop through them
    # Count each line starting at zero
    #   one_line[0] = the number of that line
    #   one_line[1] = the contents of that line
    for one_line in enumerate(f.readlines()):
        # If the counter is even, print with no extra newline
        if one_line[0] % 2 == 0:
            print(one_line[1], end='')
        # If the counter is odd, print with an extra newline
        else:
            print('   - ' + one_line[1])

No one can make you feel inferior without your consent.
   - Eleanor Roosevelt

Don't tell me not to reach for the stars when there are footprints on the moon.
   - Anonymous

I've been through some terrible things.  Some of which actually happened.
   - Mark Twain


#### Looping through a file line by line

In [28]:
# Reading a file line-by-line ia advisable if you don't know how large the file is.  This is more memory efficient.
# Notice that since you aren't reading the entire file at once, you can't use the 'enumerate' function.  That means
# you have to create your own counter.

# Setup a counter
counter = 1

with open('C:/Users/owner/Desktop/Notepad/pythonquotes.txt') as f: 
    one_line = f.readline()
    # As long as there are lines to read...
    while one_line:
        if counter % 2 == 0:
            print('   - ' + one_line)
        else:
            print(one_line, end='')
        # Increment the counter
        counter += 1
        one_line = f.readline()

No one can make you feel inferior without your consent.
   - Eleanor Roosevelt

Don't tell me not to reach for the stars when there are footprints on the moon.
   - Anonymous

I've been through some terrible things.  Some of which actually happened.
   - Mark Twain


#### Appending a line to a file

In [33]:
# If you use 'write', the entire conetents of your file will be overwritten.
# If you use 'append', the new line will be added to the end of the file.
new_name = 'Dylan Marshall'

# The 'a' in the statement below signifies the code to append the name, not overwrite the file.
with open('C:/Users/owner/Desktop/Notepad/pythonnames.txt', 'a', encoding='utf-8') as f: 
    f.write('\n')
    f.write(new_name)

# File closes automatically, so no need to close it manually.
print('\nDone')

# Reopen the file and print it's contents
with open('C:/Users/owner/Desktop/Notepad/pythonnames.txt', encoding='utf-8') as f: 
    print(f.read())


Done
Claire Dunphy
Phil Dunphy
Haley Dunphy
Alex Dunphy
Luke Dunphy
Dylan Marshall


#### Using tell() to determine the pointer location

In [38]:
# The tell() method returns the current position of the pointer in the file.
with open('C:/Users/owner/Desktop/Notepad/pythonnames.txt', encoding='utf-8') as f:
    # Read the first line
    print(f.tell())
    one_line = f.readline()
    # Keep reading one line at a time until there are no more lines
    while one_line:
        print(one_line[:-1], f.tell())
        one_line = f.readline()
        # If you run this using readlines() instead of readline(), you will see that the pointer is at the end of the file for each line.

0
Claire Dunphy 15
Phil Dunphy 28
Haley Dunphy 42
Alex Dunphy 55
Luke Dunphy 68
Dylan Marshal 82


#### Using seek() to reposition the pointer in a file

In [39]:
# Most common usage of seek is to put the pointer back to the beginning of the file.
with open('C:/Users/owner/Desktop/Notepad/pythonnames.txt', encoding='utf-8') as f:
    # Read the first line
    print(f'Position = ' + str(f.tell()))
    one_line = f.readline()
    # Keep reading one line at a time until there are no more lines
    while one_line:
        print(one_line[:-1], f.tell())
        one_line = f.readline()
        # If you run this using readlines() instead of readline(), you will see that the pointer is at the end of the file for each line.
    # Reposition the pointer to the beginning of the file
    f.seek(0)
    print(f'Position = ' + str(f.tell()))
    # Read the entire file
    print(f.read()) 

Position = 0
Claire Dunphy 15
Phil Dunphy 28
Haley Dunphy 42
Alex Dunphy 55
Luke Dunphy 68
Dylan Marshal 82
Position = 0
Claire Dunphy
Phil Dunphy
Haley Dunphy
Alex Dunphy
Luke Dunphy
Dylan Marshall


#### Reading and copying a binary file

In [40]:
# Specify the file to copy
file_to_copy = 'C:/Users/owner/Desktop/Notepad/PythonBasketballPic.jpg'

# Create a new file name with _copy before the extension
# The next line of code splits the file name into two parts: the name and the extension.
name_parts = file_to_copy.split('.')

# name_parts[0] is the name of the file; name_parts[1] is the extension.
# Here, we are adding '_copy' to the name and then adding the extension back on.
new_file_name = name_parts[0] + '_copy.' + name_parts[1]

# Open the original file as a read only binary file
with open(file_to_copy, 'rb') as original_file:
    # Open the new file as a write binary file
    with open(new_file_name, 'wb') as new_file:
        # Read a chunk of the original file so as to not overwhelm computer resources (4KB chunks)
        chunk = original_file.read(4096)
        # Loop through the file until there are no chunks left
        while len(chunk) > 0:
            # Write the chunk to the new file
            new_file.write(chunk)
            # Read the next chunk
            chunk = original_file.read(4096)
print('Done!')

Done!


#### Reading a CSV file

In [41]:
import csv

# Open the CSV file with encoindg='utf-8' to handle special characters.  Adding 'newline' at the end
# prevents the newline at the end of the row from being included..
with open('C:/Users/owner/Desktop/Notepad/pythoncsv.csv', encoding='utf-8', newline = '') as f:
    # To create a simple CSV reader object, do this
    # reader = csv.reader(f)
    # Create a row counter (optional but let's you see which row you are on)
    reader = enumerate(csv.reader(f))
    # Loop through each row in the CSV file.  Use 'i' to track the counter and 'row' to track the contents of the row.
    for i, row in reader:
        # Print the counter and the row
        print(i, row)
print('Done!')

0 ['Full Name', 'Birth Year', 'Date Joined', 'Is Active', 'Balance']
1 ['Tucker, Cameron', '1998', '3/5/1968', 'TRUE', '$300.00']
2 ['Pritchett, Mitchell', '2000', '12/1/1967', 'TRUE', '$200.00']
3 ['', '', '', '', '']
4 ['Pritchett, Lilly', '2020', '2/3/1988', 'FALSE', '$50.00']
Done!


#### Extracting and converting columns from a CSV file

In [42]:
import csv
import datetime

# Open the CSV file with encoindg='utf-8' to handle special characters.  Adding 'newline' at the end
# prevents the newline at the end of the row from being included..
with open('C:/Users/owner/Desktop/Notepad/pythoncsv.csv', encoding='utf-8', newline = '') as f:
    # Create a simple CSV reader object
    # reader = csv.reader(f)
    # Create a row counter (optional but let's you see which row you are on)
    reader = enumerate(csv.reader(f))
    # Loop through each row in the CSV file.  Use 'i' to track the counter and 'row' to track the contents of the row.
    for i, row in reader:
        # First row is the header - okay to ignore
        if i > 0:
            # Split the first column into first and last name
            try:
                full_name = row[0].split(', ')
                last_name = full_name[0].strip()
                first_name = full_name[1].strip()
            except IndexError:
                full_name = last_name = first_name = ''
                # Continue processing the line if no name is present
                continue
            # Convert the second column - birth year - to an integer
            birth_year = int(row[1] or 0)
            # Convert date joined to date
            try:
                date_joined = datetime.datetime.strptime(row[2], '%m/%d/%Y').date()
            except ValueError:
                # Use 'None' for an empty object and '' for an empty STRING.  But we're converting to datetime here, so use NONE.
                date_joined = None
            # Excel stores boolean values in all uppercase.  We want to convert it to a flag here.
            is_active = True if row[3] == 'TRUE' else False
            # Handle the 'balance' column next by stripping away the dollar signs and commas.  Use 'strip' to remove
            # any leading or trailing spaces.
            str_balance = (row[4].replace('$', '').replace(',', '')).strip()
            balance = float(str_balance or 0)
            # Print the column values
            print(first_name, last_name, birth_year, date_joined, is_active, balance)
            # Print the counter and the row
            # print(i, row)
print('Done!')

Cameron Tucker 1998 1968-03-05 True 300.0
Mitchell Pritchett 2000 1967-12-01 True 200.0
Lilly Pritchett 2020 1988-02-03 False 50.0
Done!


#### Importing CSV to Python objects

In [43]:
import csv
import datetime as dt

# Get the first_name from the full_name
def fname(any):
    try:
        nm = any.split(', ')
        return nm[1].strip()
    except IndexError:
        return ''

# Get the last_name from the full_name
def lname(any):
    try:
        nm = any.split(', ')
        return nm[0].strip()
    except IndexError:
        return ''

# Convert string to integer (or zero if no value)
def integer(any):
    return int(any or 0)

# Convert mm/dd/yyyy to date (or None if no value)        
def date(any):
    try:
        return dt.datetime.strptime(any, '%m/%d/%Y').date()
    except ValueError:
        return None

# Convert TRUE to True and anything else to False
def boolean(any):
    return True if any == 'TRUE' else False

# Convert string to float (or zero if no value)
def floatnum(any):
    return float(any.replace('$', '').replace(',', '').strip() or 0)    

# Create an empty LIST of people
people = []

# Define a class where each person is an object
class Person:
    def __init__(self, id, first_name, last_name, birth_year, date_joined, is_active, balance):
        self.id = id
        self.first_name = first_name
        self.last_name = last_name
        self.birth_year = birth_year
        self.date_joined = date_joined
        self.is_active = is_active
        self.balance = balance  

# Open the CSV file with encoindg='utf-8' to handle special characters.  Adding 'newline' at the end
# prevents the newline at the end of the row from being included..
with open('C:/Users/owner/Desktop/Notepad/pythoncsv.csv', encoding='utf-8', newline = '') as f:
    # Setup a CSV reader with a counter
    reader = enumerate(csv.reader(f))
    # Skip the first row (header row)
    f.readline()
    # Loop through each row in the CSV file.  Use 'i' to track the counter and 'row' to track the contents of the row.
    for i, row in reader:
            # For each row in the CSV file, creata a Person object with a unique ID and
            # appropirate data types, then add to the people list.
            if fname(row[0]) == '': continue
            people.append(Person(i, fname(row[0]), lname(row[0]), integer(row[1]), date(row[2]), boolean(row[3]), floatnum(row[4])))

# When the loop above is done, show all the objects in the people list.
for person in people:
    print(person.id, person.first_name, person.last_name, person.birth_year, person.date_joined, person.is_active, person.balance)  

0 Cameron Tucker 1998 1968-03-05 True 300.0
1 Mitchell Pritchett 2000 1967-12-01 True 200.0
3 Lilly Pritchett 2020 1988-02-03 False 50.0


#### Importing CSV to Python dictionaries

In [44]:
import csv
import datetime as dt

# Get the first_name from the full_name
def fname(any):
    try:
        nm = any.split(', ')
        return nm[1].strip()
    except IndexError:
        return ''

# Get the last_name from the full_name
def lname(any):
    try:
        nm = any.split(', ')
        return nm[0].strip()
    except IndexError:
        return ''

# Convert string to integer (or zero if no value)
def integer(any):
    return int(any or 0)

# Convert mm/dd/yyyy to date (or None if no value)        
def date(any):
    try:
        return dt.datetime.strptime(any, '%m/%d/%Y').date()
    except ValueError:
        return None

# Convert TRUE to True and anything else to False
def boolean(any):
    return True if any == 'TRUE' else False

# Convert string to float (or zero if no value)
def floatnum(any):
    return float(any.replace('$', '').replace(',', '').strip() or 0)    

# Create an empty DICTIONARY of people
people = {}

# Open the CSV file with encoindg='utf-8' to handle special characters.  Adding 'newline' at the end
# prevents the newline at the end of the row from being included..
with open('C:/Users/owner/Desktop/Notepad/pythoncsv.csv', encoding='utf-8', newline = '') as f:
    # Setup a CSV reader with a counter
    reader = enumerate(csv.reader(f))
    # Skip the first row (header row)
    f.readline()
    # Loop through each row in the CSV file.  Use 'i' to track the counter and 'row' to track the contents of the row.
    for i, row in reader:
            # For each row in the CSV file, create a dictionary item with a unique ID and
            # appropirate data types, then add to the people dictionary.
            if fname(row[0]) == '': continue
            # newdict is a dictionary with the same data as the Person object
            newdict = dict(id = i, first_name = fname(row[0]), last_name = lname(row[0]), birth_year = integer(row[1]), date_joined = date(row[2]), is_active = boolean(row[3]), balance = floatnum(row[4]))
            people[i + 1] = newdict
# When the loop above is done, show all the objects in the people list.
for person in people.keys():
    id = person

    print(id, 
            people[person]["first_name"], 
            people[person]["last_name"], 
            people[person]["birth_year"], 
            people[person]["date_joined"], 
            people[person]["is_active"], 
            people[person]["balance"])   




1 Cameron Tucker 1998 1968-03-05 True 300.0
2 Mitchell Pritchett 2000 1967-12-01 True 200.0
4 Lilly Pritchett 2020 1988-02-03 False 50.0
