In [30]:
import os.path
import glob

In [31]:
# general syntax to use a library
# output = library_name.function_name(input)

outfile_directory = os.path.join('data', 'outfiles')
outfile_directory
# prints what the directory will look like for the users os

'data/outfiles'

In [44]:
# set a variable to hold your file location
# new here: using * as a wildcard to signify all files that ends in '.out'
# note that "*.ou" won't work as files end in .out, but *.ou* would work
# will join your directories in the path using \ or /, depending on your os
file_location = os.path.join(outfile_directory, '*.out')

In [45]:
print(file_location)

# prints 'data/outfiles/*.out' but it hasn't captured all the possible file paths yet

data/outfiles/*.out


In [48]:
# set variable to create a list of all file paths using glob function
# how does glob work? looks for filepaths that match the input filepath
# point out that library and function name are the same here
# output is a list of file names

filenames = glob.glob(file_location)
print(filenames)

# prints a list of filepaths that match the criteria within file_location 
# (so it won't grab .out files that are in other directories)

['data/outfiles/propanol.out', 'data/outfiles/pentanol.out', 'data/outfiles/decanol.out', 'data/outfiles/methanol.out', 'data/outfiles/octanol.out', 'data/outfiles/ethanol.out', 'data/outfiles/hexanol.out', 'data/outfiles/heptanol.out', 'data/outfiles/butanol.out', 'data/outfiles/nonanol.out']


In [9]:
# parse every file, readlines, and find the info you want to capture

for f in filenames: 
    outfile = open(f, 'r')
    data = outfile.readlines()
    outfile.close()
    for line in data:
        if 'Final Energy' in line:
            energy_line = line
            words = energy_line.split()
            energy = float(words[3])
            print(energy)



-193.12836249728798
-271.20138119895074
-466.3836241400086
-115.04800861868374
-388.3110864554743
-154.09130176573018
-310.2385332251633
-349.27397687072676
-232.1655798347283
-427.3465180082815


In [10]:
# but what molecule does each number correspond to?
# we have a list of file paths saved as a list called filenames
# lets simplify to capture 1 filepath by using the index number (0 for 1st file name)

first_file = filenames[0]
print(first_file)

data/outfiles/propanol.out


In [25]:
# now we have the file path saved as a string, but we don't want the whole path
# there is a function in library os.path to help us with that
# we could split on /, figure out how many items are in our split, and select the last element of the split
# instead, we can do all of that in one handy function called .basename

file_name = os.path.basename(first_file)
print(file_name)

propanol.out


In [26]:
# how do we use the functions we already know to pull out just the molecule name?

split_filename = file_name.split('.')
print(split_filename)
molecule_name = split_filename[0]
print(molecule_name)

['propanol', 'out']
propanol


In [29]:
# we want to pull out the molecule name from each file and have it linked to the correct value
# let's start by copying our previous nested loop and modifying it here
# it's getting complicated, so let's start commenting our code

for f in filenames: 
    # get the molecule name from each file name
    file_name = os.path.basename(f)
    split_filename = file_name.split('.')
    molecule_name = split_filename[0]
    
    # open and read in all of our files
    outfile = open(f, 'r')
    data = outfile.readlines()
    outfile.close()
    
    # for each file, search each line for the Final Energy value
    # pull out the value we want
    for line in data:
        if 'Final Energy' in line:
            energy_line = line
            words = energy_line.split()
            energy = float(words[3])
            # note that we are adding molecule name and it will be printed with its energy value
            print(molecule_name, energy)
            

propanol -193.12836249728798
pentanol -271.20138119895074
decanol -466.3836241400086
methanol -115.04800861868374
octanol -388.3110864554743
ethanol -154.09130176573018
hexanol -310.2385332251633
heptanol -349.27397687072676
butanol -232.1655798347283
nonanol -427.3465180082815


## Printing/writing information to a file

In [49]:
# general syntax for writing to a new file
# filehandle = open('file_name.txt', 'w+')
# again, filehandle is a pointer for a file
# w means open the file for writing
# w+ open the file for writing and create it if it doesn't exist (will overwrite file if it does exist)
# a will append text to end of file
# a+ is the same, but file will be created if it doesn't exist

datafile = open('energies.txt','w+')

for f in filenames: 
    #get the molecule name
    file_name = os.path.basename(f)
    file_name_split = file_name.split('.')
    molecule_name = file_name_split[0]

    # Read the data
    outfile = open(f, 'r')
    data = outfile.readlines()
    outfile.close()
    
    # Loop through the data line by line, saving number
    for line in data:
        if 'Final Energy' in line:
            energy_line = line
            words = energy_line.split()
            energy = float(words[3])
            print(molecule_name, energy)
            # .write is a function to write to the file your are pointing to with the datafile variable
            # python only writes strings to files, so use f string to print the contents of a {variable} within the string
            # \t inserts a tab, which looks like an arrow in jupyter notebook
            # \n inserts a line break
            # {energy: .3f} means to use three positions after the decimal point
            datafile.write(f'{molecule_name} \t {energy: .3f} \n')

# file will not get written until it is closed, so make sure to put in the file.close() 
datafile.close()

propanol -193.12836249728798
pentanol -271.20138119895074
decanol -466.3836241400086
methanol -115.04800861868374
octanol -388.3110864554743
ethanol -154.09130176573018
hexanol -310.2385332251633
heptanol -349.27397687072676
butanol -232.1655798347283
nonanol -427.3465180082815


In [None]:
# alternative way to print as a more readable sentence:

datafile.write(f'For the file {molecule_name}, the energy is {energy: .3f} kcal/mole.')