# MolSSI Workshop - Python Data and Scripting



## Introduction

This lesson covers Python basics like variable creation and assignment using the Jupyter notebook.

In [None]:
# A Python interpreter can behave like a calculator
3 + 7

In [None]:
# Assigning variables & doing calculations
deltaH = -541.5   #kJ/mole
deltaS =  10.4     #kJ/(mole K)
temp = 298      #Kelvin
deltaG = deltaH - temp*deltaS

In [None]:
print(deltaG)

In [None]:
# variables are immutable
print(deltaG)
deltaG*1000
print(deltaG)

In [None]:
# If we want to change the value of a variable, we have to overwrite it.
print(deltaG)
deltaG = deltaG*1000
print(deltaG)

In [None]:
# It is usually a better idea to make a new variable
print(deltaG)
deltaG_joules = deltaG*1000
print(deltaG)
print(deltaG_joules)

In [None]:
# You can assign multiple variables at once
deltaH, deltaS, temp = -541.5, 10.4, 298
deltaG = deltaH - temp*deltaS
print(deltaG)

In [None]:
# Data types
type(deltaG)

In [None]:
# You can change the data type of a variable.
deltaG_string = str(deltaG)
type(deltaG_string)

In [None]:
print(deltaG_string)

In [None]:
# Lists can be used to group several values or variables together.

# This is a list
energy_kcal = [-13.4, -2.7, 5.4, 42.1]
# I can determine its length
energy_length = len(energy_kcal)

# print the list length
print('The length of this list is', energy_length)

In [None]:
# We can access specific elements of a list using integers
# Counting starts at 0

# Print the first element of the list
print(energy_kcal[0])

In [None]:
# You can use an element of a list as a variable in a calculation

# Calculate the second list element in kilojoules.
energy_kilojoules = energy_kcal[1]*4.184
print(energy_kilojoules)

# Note that this does not change the list
print(energy_kcal)

In [None]:
# Slicing a list

# Make a new list from elements 0 to 2. Note that it starts with the first
# number, but does not include the last number
short_list = energy_kcal[0:2]

In [None]:
print(short_list)

In [None]:
# Check your understanding exercise
slice1 = energy_kcal[1:]
slice2 = energy_kcal[:3]
print('slice1 is', slice1)
print('slice2 is', slice2)

In [None]:
for number in energy_kcal:
    kJ = number*4.184
    print(kJ)

In [None]:
# We can record these values in a new list using `append`. 

# We can only append to existing lists, so we make an empty one.
energy_kJ = []

for number in energy_kcal:
    kJ = number*4.184
    energy_kJ.append(kJ)

print(energy_kJ)

In [None]:
# We can use `if` statements to make choices in our code.

# What if we wanted to find all the negative numbers?
negative_energy_kJ = []

for number in energy_kJ:
    if number<0:
        negative_energy_kJ.append(number)

print(negative_energy_kJ)

In [None]:
# You can also use `and`, `or` to check more than one condition.
negative_numbers = []
for number in energy_kJ:
    if number<0 or number==0:
        negative_numbers.append(number)

print(negative_numbers)

## File Parsing

This lesson covers file parsing

In [None]:
ls data

In [None]:
pwd

In [None]:
import os

ethanol_file = os.path.join('data', 'outfiles', 'ethanol.out')
print(ethanol_file)

In [None]:
# Open a file for reading
outfile = open(ethanol_file, 'r')

# Read the file
data = outfile.readlines()

# Close the file
outfile.close()

In [None]:
# The readlines function puts the file into a list where each element is a line
print(type(data))

In [None]:
for line in data:
    print(line)

In [None]:
for line in data:
    if 'Final Energy' in line:
        energy_line = line
        print(energy_line)

In [None]:
# We can use the `split` function to split a line based on a delimiter.

# It will split on whitespace by default.
energy_line.split()

In [None]:
# We can specify other delimiters, like a colon.
energy_line.split(':')

In [None]:
words = energy_line.split()
print(words)

In [None]:
energy = words[3]
print(energy)

In [None]:
# However, this  is a string.
energy + 50

In [None]:
# We can change it to a number by casting it to float
energy = float(energy)

In [None]:
# We can use enumerate with a for loop to get a counter.
for linenum, line in enumerate(words):
    print(linenum, line)

In [None]:
# We can use this to find the line number of the Center of mass in the file.
for linenum, line in enumerate(data):
    if 'Center' in line:
        print(linenum)
        print(line)