# Interacting with the OS and filesystem

In [1]:
import os

In [2]:
os.getcwd()

'C:\\Users\\Hp\\AppData\\Local\\Programs\\Python\\Python39\\Scripts'

In [3]:
#To get the list of files in a directory
help(os.listdir)

Help on built-in function listdir in module nt:

listdir(path=None)
    Return a list containing the names of the files in the directory.
    
    path can be specified as either str, bytes, or a path-like object.  If path is bytes,
      the filenames returned will also be bytes; in all other circumstances
      the filenames returned will be str.
    If path is None, uses the path='.'.
    On some platforms, path may also be specified as an open file descriptor;\
      the file descriptor must refer to a directory.
      If this functionality is unavailable, using it raises NotImplementedError.
    
    The list is in arbitrary order.  It does not include the special
    entries '.' and '..' even if they are present in the directory.



In [7]:
os.listdir('.')

['.ipynb_checkpoints',
 'Arithmetic Operations.ipynb',
 'climate.txt',
 'climate_results.txt',
 'f2py.exe',
 'Functions.ipynb',
 'iptest.exe',
 'iptest3.exe',
 'ipython.exe',
 'ipython3.exe',
 'jsonschema.exe',
 'jupyter-bundlerextension.exe',
 'jupyter-console.exe',
 'jupyter-kernel.exe',
 'jupyter-kernelspec.exe',
 'jupyter-migrate.exe',
 'jupyter-nbconvert.exe',
 'jupyter-nbextension.exe',
 'jupyter-notebook.exe',
 'jupyter-qtconsole.exe',
 'jupyter-run.exe',
 'jupyter-serverextension.exe',
 'jupyter-troubleshoot.exe',
 'jupyter-trust.exe',
 'jupyter.exe',
 'Loops .ipynb',
 'Numpy.ipynb',
 'pip.exe',
 'pip3.9.exe',
 'pip3.exe',
 'pygmentize.exe',
 'pywin32_postinstall.py',
 'pywin32_testall.py',
 'Reading & writing to files.ipynb',
 'Variables & Data Structures.ipynb',
 '__pycache__']

In [8]:
os.makedirs('./data', exist_ok=True)

In [9]:
'data' in os.listdir('.')

True

In [10]:
os.listdir('./data')

[]

In [11]:
#downloading some files into the data directory using the urllib module.
url1 = 'https://gist.githubusercontent.com/aakashns/257f6e6c8719c17d0e498ea287d1a386/raw/7def9ef4234ddf0bc82f855ad67dac8b971852ef/loans1.txt'
url2 = 'https://gist.githubusercontent.com/aakashns/257f6e6c8719c17d0e498ea287d1a386/raw/7def9ef4234ddf0bc82f855ad67dac8b971852ef/loans2.txt'
url3 = 'https://gist.githubusercontent.com/aakashns/257f6e6c8719c17d0e498ea287d1a386/raw/7def9ef4234ddf0bc82f855ad67dac8b971852ef/loans3.txt'

In [12]:
from urllib.request import urlretrieve

In [13]:
urlretrieve(url1, './data/loans1.txt')

('./data/loans1.txt', <http.client.HTTPMessage at 0x13f87774370>)

In [14]:
urlretrieve(url2, './data/loans2.txt')

('./data/loans2.txt', <http.client.HTTPMessage at 0x13f8783afa0>)

In [15]:
urlretrieve(url3, './data/loans3.txt')

('./data/loans3.txt', <http.client.HTTPMessage at 0x13f8783a820>)

In [16]:
#verify that the files were downloaded.
os.listdir('./data')

['loans1.txt', 'loans2.txt', 'loans3.txt']

# Reading from a file

In [17]:
file1 = open('./data/loans1.txt', mode = 'r')

In [18]:
file1_contents = file1.read()

In [19]:
print(file1_contents)

amount,duration,rate,down_payment
100000,36,0.08,20000
200000,12,0.1,
628400,120,0.12,100000
4637400,240,0.06,
42900,90,0.07,8900
916000,16,0.13,
45230,48,0.08,4300
991360,99,0.08,
423000,27,0.09,47200


In [20]:
#closing file.
file1.close()

In [21]:
#trying to read file after closing
file1.read()

ValueError: I/O operation on closed file.

# Closing files automatically using with

In [22]:
with open('./data/loans2.txt') as file2:
    file2_contents = file2.read()
    print(file2_contents)

amount,duration,rate,down_payment
828400,120,0.11,100000
4633400,240,0.06,
42900,90,0.08,8900
983000,16,0.14,
15230,48,0.07,4300


Once the statements within the `with` block are executed, the `.close` method on `file2` is automatically invoked. Let's verify this by trying to read from the file object again.

In [23]:
file2.read()

ValueError: I/O operation on closed file.

# Reading a file line by line

In [24]:
with open('./data/loans3.txt', mode = 'r') as file3:
    file3_lines = file3.readlines()

In [25]:
file3_lines

['amount,duration,rate,down_payment\n',
 '45230,48,0.07,4300\n',
 '883000,16,0.14,\n',
 '100000,12,0.1,\n',
 '728400,120,0.12,100000\n',
 '3637400,240,0.06,\n',
 '82900,90,0.07,8900\n',
 '316000,16,0.13,\n',
 '15230,48,0.08,4300\n',
 '991360,99,0.08,\n',
 '323000,27,0.09,4720010000,36,0.08,20000\n',
 '528400,120,0.11,100000\n',
 '8633400,240,0.06,\n',
 '12900,90,0.08,8900']

# Processing data from files

defining a function parse_header that takes a line as input and returns a list of column headers.

In [26]:
def parse_headers(header_line):
    return header_line.strip().split(',')

The strip method removes any extra spaces and the newline character \n. The split method breaks a string into a list using the given separator (, in this case).

In [27]:
file3_lines[0]

'amount,duration,rate,down_payment\n'

In [28]:
headers = parse_headers(file3_lines[0])

In [29]:
headers

['amount', 'duration', 'rate', 'down_payment']

defining a function parse_values that takes a line containing some data and returns a list of floating-point numbers.

In [33]:
def parse_values(data_line):
    values = []
    for item in data_line.strip().split(','):
        values.append(float(item))
    return values

In [34]:
file3_lines[1]

'45230,48,0.07,4300\n'

In [35]:
parse_values(file3_lines[1])

[45230.0, 48.0, 0.07, 4300.0]

In [36]:
file3_lines[2]

'883000,16,0.14,\n'

In [37]:
parse_values(file3_lines[2])

ValueError: could not convert string to float: ''

The code above leads to a ValueError because the empty string '' cannot be converted to a float. We can enhance the parse_values function to handle this edge case. We will also handle the case where the value is not a float.

In [38]:
def parse_values(data_line):
    values = []
    for item in data_line.strip().split(','):
        if item == '':
            values.append(0.0)
        else:
            try:
                values.append(float(item))
            except ValueError:
                values.append(item)
    return values
            

In [39]:
file3_lines[2]

'883000,16,0.14,\n'

In [40]:
parse_values(file3_lines[2])

[883000.0, 16.0, 0.14, 0.0]

define a function create_item_dict that takes a list of values and a list of headers as inputs and returns a dictionary with the values associated with their respective headers as keys.

In [41]:
def create_item_dict(values, headers):
    result = {}
    for value, header in zip(values, headers):
        result[header] = value
    return result

In [42]:
for item in zip([1,2,3], ['a', 'b', 'c']):
    print(item)

(1, 'a')
(2, 'b')
(3, 'c')


In [43]:
file3_lines[1]

'45230,48,0.07,4300\n'

In [44]:
values1 = parse_values(file3_lines[1])
create_item_dict(values1, headers)

{'amount': 45230.0, 'duration': 48.0, 'rate': 0.07, 'down_payment': 4300.0}

As expected, the values & header are combined to create a dictionary with the appropriate key-value pairs.

We are now ready to put it all together and define the read_csv function.

In [45]:
def read_csv(path):
    result = []
    # Open the file in read mode
    with open(path, 'r') as f:
        #Get a list of lines
        lines = f.readlines()
        #Parse the header
        headers = parse_headers(lines[0])
        #Loop over the remaining lines
        for data_line in lines[1:]:
            #Parse the values
            values = parse_values(data_line)
            #Create a dictionary using values & headers
            item_dict = create_item_dict(values, headers)
            #Add the dictionary to the result
            result.append(item_dict)
    return result

In [46]:
with open('./data/loans2.txt') as file2:
    print(file2.read())

amount,duration,rate,down_payment
828400,120,0.11,100000
4633400,240,0.06,
42900,90,0.08,8900
983000,16,0.14,
15230,48,0.07,4300


In [48]:
read_csv('./data/loans2.txt')

[{'amount': 828400.0,
  'duration': 120.0,
  'rate': 0.11,
  'down_payment': 100000.0},
 {'amount': 4633400.0, 'duration': 240.0, 'rate': 0.06, 'down_payment': 0.0},
 {'amount': 42900.0, 'duration': 90.0, 'rate': 0.08, 'down_payment': 8900.0},
 {'amount': 983000.0, 'duration': 16.0, 'rate': 0.14, 'down_payment': 0.0},
 {'amount': 15230.0, 'duration': 48.0, 'rate': 0.07, 'down_payment': 4300.0}]

The full code for read_csv along with the helper functions:

In [49]:
def parse_headers(header_line):
    return header_line.strip().split(',')

def parse_values(data_line):
    values = []
    for item in data_line.strip().split(','):
        if item == '':
            values.append(0.0)
        else:
            try:
                values.append(float(item))
            except ValueError:
                values.append(item)
    return values

def create_item_dict(values, headers):
    result = {}
    for value, header in zip(values, headers):
        result[header] = value
    return result

def read_csv(path):
    result = []
    # Open the file in read mode
    with open(path, 'r') as f:
        # Get a list of lines
        lines = f.readlines()
        # Parse the header
        headers = parse_headers(lines[0])
        # Loop over the remaining lines
        for data_line in lines[1:]:
            # Parse the values
            values = parse_values(data_line)
            # Create a dictionary using values & headers
            item_dict = create_item_dict(values, headers)
            # Add the dictionary to the result
            result.append(item_dict)
    return result

Defining a function to calculate the equal monthly installments for a loan.

In [50]:
import math

def loan_emi(amount, duration, rate, down_payment=0):
    """Calculates the equal montly installment (EMI) for a loan.
    
    Arguments:
        amount - Total amount to be spent (loan + down payment)
        duration - Duration of the loan (in months)
        rate - Rate of interest (monthly)
        down_payment (optional) - Optional intial payment (deducted from amount)
    """
    loan_amount = amount - down_payment
    try:
        emi = loan_amount * rate * ((1+rate)**duration) / (((1+rate)**duration)-1)
    except ZeroDivisionError:
        emi = loan_amount / duration
    emi = math.ceil(emi)
    return emi

We can use this function to calculate EMIs for all the loans in a file.

In [51]:
loans2 = read_csv('./data/loans2.txt')

In [52]:
loans2

[{'amount': 828400.0,
  'duration': 120.0,
  'rate': 0.11,
  'down_payment': 100000.0},
 {'amount': 4633400.0, 'duration': 240.0, 'rate': 0.06, 'down_payment': 0.0},
 {'amount': 42900.0, 'duration': 90.0, 'rate': 0.08, 'down_payment': 8900.0},
 {'amount': 983000.0, 'duration': 16.0, 'rate': 0.14, 'down_payment': 0.0},
 {'amount': 15230.0, 'duration': 48.0, 'rate': 0.07, 'down_payment': 4300.0}]

In [53]:
for loan in loans2:
    loan['emi'] = loan_emi(loan['amount'], 
                           loan['duration'], 
                           loan['rate']/12, # the CSV contains yearly rates
                           loan['down_payment'])

In [54]:
loans2

[{'amount': 828400.0,
  'duration': 120.0,
  'rate': 0.11,
  'down_payment': 100000.0,
  'emi': 10034},
 {'amount': 4633400.0,
  'duration': 240.0,
  'rate': 0.06,
  'down_payment': 0.0,
  'emi': 33196},
 {'amount': 42900.0,
  'duration': 90.0,
  'rate': 0.08,
  'down_payment': 8900.0,
  'emi': 504},
 {'amount': 983000.0,
  'duration': 16.0,
  'rate': 0.14,
  'down_payment': 0.0,
  'emi': 67707},
 {'amount': 15230.0,
  'duration': 48.0,
  'rate': 0.07,
  'down_payment': 4300.0,
  'emi': 262}]

We can see that each loan now has a new key emi, which provides the EMI for the loan. We can extract this logic into a function so that we can use it for other files too.

In [73]:
def compute_emis(loans):
    for loan in loans:
        loan['emi'] = loan_emi(
            loan['amount'], 
            loan['duration'], 
            loan['rate']/12, # the CSV contains yearly rates
            loan['down_payment'])


# Writing to files

In [74]:
loans2 = read_csv('./data/loans2.txt')

In [75]:
compute_emis(loans2)

KeyError: 'amount'

In [None]:
loans2

In [78]:
with open('./data/emis2.txt', 'w') as f:
    for loan in loans2:
        f.write('{},{},{},{},{}\n'.format(
            loan['amount'], 
            loan['duration'], 
            loan['rate'], 
            loan['down_payment'], 
            loan['emi']))

KeyError: 'amount'

In [79]:
os.listdir('data')

['emis1.txt',
 'emis2.txt',
 'emis3.txt',
 'loans1.txt',
 'loans2.txt',
 'loans3.txt']

In [65]:
with open('./data/emis2.txt', 'r') as f:
    print(f.read())

828400.0, 120.0, 0.11, 100000.0, 10034
4633400.0, 240.0, 0.06, 0.0, 33196
42900.0, 90.0, 0.08, 8900.0, 504
983000.0, 16.0, 0.14, 0.0, 67707
15230.0, 48.0, 0.07, 4300.0, 262



Defining a generic function write_csv which takes a list of dictionaries and writes it to a file in CSV format. We will also include the column headers in the first line.

In [66]:
def write_csv(items, path):
    # Open the file in write mode
    with open(path, 'w') as f:
        # Return if there's nothing to write
        if len(items) == 0:
            return
        
        # Write the headers in the first line
        headers = list(items[0].keys())
        f.write(','.join(headers) + '\n')
        
        # Write one item per line
        for item in items:
            values = []
            for header in headers:
                values.append(str(item.get(header, "")))
            f.write(','.join(values) + "\n")

In [67]:
loans3 = read_csv('./data/loans3.txt')

In [68]:
compute_emis(loans3)

In [69]:
write_csv(loans3, './data/emis3.txt')

In [70]:
with open('./data/emis3.txt', 'r') as f:
    print(f.read())

amount,duration,rate,down_payment,emi
45230.0,48.0,0.07,4300.0,981
883000.0,16.0,0.14,0.0,60819
100000.0,12.0,0.1,0.0,8792
728400.0,120.0,0.12,100000.0,9016
3637400.0,240.0,0.06,0.0,26060
82900.0,90.0,0.07,8900.0,1060
316000.0,16.0,0.13,0.0,21618
15230.0,48.0,0.08,4300.0,267
991360.0,99.0,0.08,0.0,13712
323000.0,27.0,0.09,4720010000.0,-193751447
528400.0,120.0,0.11,100000.0,5902
8633400.0,240.0,0.06,0.0,61853
12900.0,90.0,0.08,8900.0,60



With just four lines of code, we can now read each downloaded file, calculate the EMIs, and write the results back to new files:

In [72]:
for i in range(1,4):
    loans = read_csv('./data/loans{}.txt'.format(i))
    compute_emis(loans)
    write_csv(loans, './data/emis{}.txt'.format(i))

KeyError: 'amount'

In [80]:
os.listdir('./data')

['emis1.txt',
 'emis2.txt',
 'emis3.txt',
 'loans1.txt',
 'loans2.txt',
 'loans3.txt']