# Plotting and Programming in Python (Continued)

## Lists

In [3]:
# Example
pressures = [0.273, 0.275, 0.277, 0.275, 0.276]
print('pressures:', pressures)
print('length:', len(pressures))
print('fourth item of pressures:', pressures[4])

pressures: [0.273, 0.275, 0.277, 0.275, 0.276]
length: 5
fourth item of pressures: 0.276


In [5]:
# Append items to a list
primes = [2, 3, 5]
print('primes is initially:', primes)
primes.append(7)
primes.append(9)
print('primes has become:', primes)

primes is initially: [2, 3, 5]
primes has become: [2, 3, 5, 7, 9]


In [7]:
# Combine lists
teen_primes = [11, 13, 17, 19]
middle_aged_primes = [37, 41, 43, 47]
print('primes is currently:', primes)
primes.extend(teen_primes)
print('primes has now become:', primes)
primes.append(middle_aged_primes)
print('primes has finally become:', primes)

primes is currently: [2, 3, 5, 7, 9, 11, 13, 17, 19, [37, 41, 43, 47]]
primes has now become: [2, 3, 5, 7, 9, 11, 13, 17, 19, [37, 41, 43, 47], 11, 13, 17, 19]
primes has finally become: [2, 3, 5, 7, 9, 11, 13, 17, 19, [37, 41, 43, 47], 11, 13, 17, 19, [37, 41, 43, 47]]


In [8]:
# Delete items form list using indexing
primes = [2, 3, 5, 7, 9]
print('primes before removing last item:', primes)
del primes[4]
print('primes after removing last item:', primes)

primes before removing last item: [2, 3, 5, 7, 9]
primes after removing last item: [2, 3, 5, 7]


In [10]:
# Note: character strings are immutable
element = 'carbon'
element[0] = 'C'

TypeError: 'str' object does not support item assignment

In [12]:
# From strings to lists and back
print('string to list:', list('tin'))
print('list to string:', ''.join(['g', 'o', 'l', 'd']))  # join function concatenates

string to list: ['t', 'i', 'n']
list to string: gold


In [13]:
# Stepping through a list
element = 'fluorine'
print(element[::2])  # low:high:stride (step)
print(element[::-1])  # steps backward by 1

furn
eniroulf


In [19]:
element = 'lithium'
print(element[0:20])
print(element[-1:3])  # note slice bounds

lithium



In [15]:
# Sort and Sorted
letters = list('gold')

# sorted(letters) returned sorted copy of list
result = sorted(letters)
print('letters is', letters, 'and result is', result)

# letters.sort() sorts letters in place and doesn't return anything
result = letters.sort()
print('letters is', letters, 'and result is', result)

letters is ['g', 'o', 'l', 'd'] and result is ['d', 'g', 'l', 'o']
letters is ['d', 'g', 'l', 'o'] and result is None


## For Loops

In [20]:
# Simple example
primes = [2, 3, 5]
for p in primes:
    squared = p ** 2
    cubed = p ** 3
    print(p, squared, cubed)

2 4 8
3 9 27
5 25 125


In [22]:
# Accumulator pattern
# Sum the first 10 integers.
total = 0
for number in range(10):
    total = total + (number + 1)  # add 1 bc numbers range from 0-9
print(total)

55


In [23]:
# Reversing a String
original = "tin"
result = ""
for char in original:
    result = char + result
print(result)

nit


In [33]:
# Create acronym: ["red", "green", "blue"] => "RGB"
colors = ["red", "green", "blue"]
acronym = ""
for color in colors:
    acronym = acronym + color[0].upper()
print(acronym)

RGB


## Conditionals

In [34]:
# Example
masses = [3.54, 2.07, 9.22, 1.86, 1.71]
for m in masses:
    if m > 3.0:
        print(m, 'is large')
    else:
        print(m, 'is small')

3.54 is large
2.07 is small
9.22 is large
1.86 is small
1.71 is small


In [36]:
# Compound Realations using 'and', 'or', and Parentheses
mass     = [ 3.54,  2.07,  9.22,  1.86,  1.71]
velocity = [10.00, 20.00, 30.00, 25.00, 20.00]

i = 0
for i in range(5):
    if mass[i] > 5 and velocity[i] > 20:
        print("Fast heavy object.  Duck!")
    elif mass[i] > 2 and mass[i] <= 5 and velocity[i] <= 20:
        print("Normal traffic")
    elif mass[i] <= 2 and velocity[i] <= 20:
        print("Slow light object.  Ignore it")
    else:
        print("Whoa!  Something is up with the data.  Check it")

Normal traffic
Normal traffic
Fast heavy object.  Duck!
Whoa!  Something is up with the data.  Check it
Slow light object.  Ignore it


## Using Functions with Conditionals in Pandas

In [38]:
def calculate_life_quartile(exp):
    if exp < 58.41:
        # This observation is in the first quartile
        return 1
    elif exp >= 58.41 and exp < 67.05:
        # This observation is in the second quartile
        return 2
    elif exp >= 67.05 and exp < 71.70:
        # This observation is in the third quartile
        return 3
    elif exp >= 71.70:
        # This observation is in the fourth quartile
        return 4
    else:
        # This observation has bad data
        return None

calculate_life_quartile(62.5)

2

In [45]:
import pandas as pd
# Apply a function to a DataFrame or portion of a DataFrame
# data = pd.read_csv('./gapminder_all.csv')
# data['life_qrtl'] = data['lifeExp'].apply(calculate_life_quartile)

## Looping Over Data Sets

In [50]:
# Example:
import pandas as pd
for filename in ['./gapminder_gdp_africa.csv', './gapminder_gdp_asia.csv']:
    data = pd.read_csv(filename, index_col='country')
    print(filename, data.min())

./gapminder_gdp_africa.csv gdpPercap_1952    298.846212
gdpPercap_1957    335.997115
gdpPercap_1962    355.203227
gdpPercap_1967    412.977514
gdpPercap_1972    464.099504
gdpPercap_1977    502.319733
gdpPercap_1982    462.211415
gdpPercap_1987    389.876185
gdpPercap_1992    410.896824
gdpPercap_1997    312.188423
gdpPercap_2002    241.165877
gdpPercap_2007    277.551859
dtype: float64
./gapminder_gdp_asia.csv gdpPercap_1952    331.0
gdpPercap_1957    350.0
gdpPercap_1962    388.0
gdpPercap_1967    349.0
gdpPercap_1972    357.0
gdpPercap_1977    371.0
gdpPercap_1982    424.0
gdpPercap_1987    385.0
gdpPercap_1992    347.0
gdpPercap_1997    415.0
gdpPercap_2002    611.0
gdpPercap_2007    944.0
dtype: float64


In [51]:
# Use glob.glob to find sets of filenames that match a pattern
import glob
print('all csv files in current directory:', glob.glob('./*.csv'))

all csv files in current directory: ['./gapminder_gdp_oceania.csv', './gapminder_gdp_africa.csv', './gapminder_gdp_americas.csv', './gapminder_gdp_europe.csv', './gapminder_gdp_asia.csv', './gapminder_all.csv']


In [52]:
print('all PDB files:', glob.glob('*.pdb'))

all PDB files: []


In [58]:
# Using both glob and for to process batches of file
for filename in glob.glob('./gapminder_*.csv'):
    data = pd.read_csv(filename)
    print(filename, '\nMin 1925 GDP: ' + str(data['gdpPercap_1952'].min()))  
    # print file name and the min GDP in 1952

./gapminder_gdp_oceania.csv 
Min 1925 GDP: 10039.595640000001
./gapminder_gdp_africa.csv 
Min 1925 GDP: 298.8462121
./gapminder_gdp_americas.csv 
Min 1925 GDP: 1397.7171369999999
./gapminder_gdp_europe.csv 
Min 1925 GDP: 973.5331947999999
./gapminder_gdp_asia.csv 
Min 1925 GDP: 331.0
./gapminder_all.csv 
Min 1925 GDP: 298.8462121


In [61]:
# Minimum File Size
import glob
import pandas as pd
fewest = float('inf');
for filename in glob.glob('./*.csv'):
    dataframe = pd.read_csv(filename)
    fewest = min(fewest, dataframe.shape[0])
print('smallest file has', fewest, 'records')

smallest file has 2 records
