# First things first

It is programming tradition to start with a "Hello world!" program.

In [6]:
print('Hello world!')

Hello world!


In [7]:
# These lines are comments. The Python interpreter ignores them.
# Comments are just notes for humans to read to help understand the code.
# Best practice: add a comment for every couple lines of code to explain what's going on and why.
# You'd be amazed at how quickly you forget your code's logic

# Basic math

In [8]:
# addition
5 + 2

7

In [9]:
# subtraction
5 - 2

3

In [10]:
# multiplication
5 * 2

10

In [11]:
# division
5 / 2

# Note that this is one of the major difference between Python 2 and 3!
# In Python 3, this performs as we'd expect, and returns 2.5.
# But in Python 2, the / operator performs integer division and only returns 
# the whole number portion of the result when you divide two integers.
# (In Python 3, integer division uses the // operator.)

2

In [12]:
# exponentiation: raising 5 to the 2nd power
5 ** 2

25

In [13]:
# exponentiation part 2: the square root of 5 (5 to the 0.5th power)
5 ** 0.5

# Note that spaces don't matter here: 5**0.5 == 5 ** 0.5

2.23606797749979

In [14]:
# the modulus operator: what is the remainder when you divide 5 by 2?
5 % 2 

1

In [15]:
# test for equality
5 == 2

False

# Variables and print commands

In [16]:
# variables, such as x here, contain values and their values can vary
x = 5

In [17]:
# what is the value of x?
x

5

In [18]:
# you can perform operations on variables, just like you can on two numbers
x + 3

8

In [19]:
# what is the value of x now?
x

5

In [20]:
# to update the value of a variable, you need to do an assignment again
x = x + 3

In [21]:
# and now what is the value of x?
x

8

In [22]:
# create a new variable y from an operation on x
x = 5
y = x * 2
y

10

In [23]:
# outputting values only displays the last thing output
x
y

10

In [24]:
# use print() to write some value to the console
print(x)
print(y)

5
10


In [25]:
# you can comma-separate values to print multiple to the console on one line
print(x, y)

(5, 10)


In [26]:
# you can also print the result of an expression
print (x * y)

50


In [28]:
# Naming conventions: you can't have spaces or dashes in variable names
# Also, variable names must begin with a letter (but after that, numbers are fine)
# Underscores _ and Mixed Case is fine
# Variable names (like the rest of Python) are case sensitive
# Stylistically, variable names should begin with a lowercase letter
# names_with_underscores are one good approach
# namesWithCapitals are another

my_var = 1
my_var2 = 2
anotherVar = 3
this-aint-a-var = 4

SyntaxError: can't assign to operator (<ipython-input-28-8155ea7f613a>, line 12)

# Data types

In [29]:
# integers are whole numbers
type(125)

int

In [30]:
# every variable has a data type, and they can be of any type
x = 125
type(x)

int

In [31]:
# float is a floating point (aka decimal) number
some_rate = 4.3
type(some_rate)

float

In [32]:
# strings are strings of characters
my_string = 'abc'
type(my_string)

str

In [33]:
# a list is a collection of elements denoted by square brackets
my_list = [1, 2, 3, 4]
print(my_list)
type(my_list)

[1, 2, 3, 4]


list

In [34]:
# the elements of a list can be of different types
new_list = [1, 'Q', 8, 'four']
type(new_list)

list

In [35]:
# a dictionary is a collection of key:value pairs, denoted by curly braces
person = {'first_name':'Geoff', 'last_name':'Boeing'}
print(person)
type(person)

{'first_name': 'Geoff', 'last_name': 'Boeing'}


dict

In [36]:
# Some data types support iteration: 
# you can get the nth element from an iterable object (like a string) with [n] indexing notation
# In Python, the index starts with zero, not one
print(my_string[0])
print(my_list[0])


a
1


In [None]:
# A tuple is also a collection of elements, but you can't change individual elements after creation
# You will generally want to use lists instead of tuples.
my_list = [1, 2, 3]
my_tuple = (1, 2, 3)

my_list[1] = 6
print(my_list)
my_tuple[1] = 4

# Python "Control Flow" or "Programming Flow" tools
## i.e. "if" statements, "for" loops, and functions

Now we're going to learn some simple tools to help you give Python some more complicated directions about how to run different commands.

### Loops

This is how to tell Python to run something over and over. There are different types of loops but we'll focus on "for" loops. A simple structure you can use is:

1. For every element...
2. In a certain "iterable" (e.g. list)...
3. Do a certain thing
4. Repeat until you've gone through every item in the iterable

In [38]:
# Example: we're going to run through this list below.

bart = ["Berkeley","Ashby","MacArthur","19th Street","12th Street","West Oakland"]

In [45]:
# Here's the simple structure above in python syntax

# Steps 1 and 2
# syntax is:  for "x" in "y"
# x is an arbitrary name that will refer to the current iteration
# y is the list you're iterating on 

for station in bart:
    
    # Step 3: what you do for each item in the list
    # Here we will just print
    print(station)
    
    # Step 4: this structure will automatically repeat! 

Berkeley
Ashby
MacArthur
19th Street
12th Street
West Oakland


Let's break this down. What happened here is:

First iteration: 
* The variable "station" is set to the first item in the list "bart", which happens to be a string, "Berkeley"
* Printing "station" therefore gives you an output of "Berkeley"

Second iteration:
* The variable "station" now equals the second item in the list, the string "Ashby", so printing "station" now outputs "Ashby"

And this continues until you hit West Oakland, at which the loop sees no more items and the loop ends.

In [44]:
# the variable "station" above is arbitrary; you can change it to whatever you want as long as it is consistent within the loop
# you'll often see people use "i" as convention

for i in bart:
    print(i)

Berkeley
Ashby
MacArthur
19th Street
12th Street
West Oakland


In [46]:
# You can set more complicated commands as well.

for i in bart:
    print("Now I am at " + i + " Station")

Now I am at Berkeley Station
Now I am at Ashby Station
Now I am at MacArthur Station
Now I am at 19th Street Station
Now I am at 12th Street Station
Now I am at West Oakland Station


In [47]:
# You can also set multiple variables to change within loops

for i in bart:
    before = "Now I am at "
    after = " Station"
    print(before + i + after)

Now I am at Berkeley Station
Now I am at Ashby Station
Now I am at MacArthur Station
Now I am at 19th Street Station
Now I am at 12th Street Station
Now I am at West Oakland Station


### if statements

You can also have Python run commands based on conditions: do x if y. 

In [64]:
# Simple structure:

warriors_wins = 70
bulls_record = 72

if warriors_wins < bulls_record:
    print "Warriors did not break the record"

Warriors did not break the record


In [65]:
# What happens if the condition is not met here?

warriors_wins = 73

if warriors_wins < bulls_record:
    print "Warriors did not break the record"

In [66]:
# if can set "else" statements for when the condition is not met

if warriors_wins < bulls_record:
    print "Warriors did not break the record"
else:
    print "Warriors tied or broke the record!"

Warriors tied or broke the record!


In [69]:
# you can set more than 2 conditions using "elif" which is short for "else if"

warriors_wins = 73

if warriors_wins < bulls_record:
    print "Warriors did not break the record"
elif warriors_wins == bulls_record:
    print "Warriors tied the record!"
else:
    print "Warriors broke the record!"

Warriors broke the record!


### Functions

So you've written a useful piece of code, and you're using it multiple times but finding it tedious to keep copying and pasting it. This is where **functions** come in. A function is basically a chunk of code that is saved under a name, and takes one or more inputs that are used in the chunk of code. Here's an example.

In [70]:
# Create a function to save the warriors code above


# "def" creates a function
# next is the function name: I call mine "record check"
# in parentheses are the "arguments" - what will the function take in and use? I have one argument, which will be a number of wins.

def record_check(number_wins):
    
    bulls_record = 72
    
    # here I copy my code from above, but I changed "warriors_wins" to "number_wins" to 
    if number_wins < bulls_record:
        print "Warriors did not break the record"
    elif number_wins == bulls_record:
        print "Warriors tied the record!"
    else:
        print "Warriors broke the record!"
        
# when I run this cell, the code above will be "saved" as the function "record_check"

In [71]:
# now we can run the function with various numbers 

record_check(70)
record_check(72)
record_check(74)

Warriors did not break the record
Warriors tied the record!
Warriors broke the record!


In [76]:
# run the function in a loop

win_list = [69,70,71,72,73,74,75]

for i in win_list:
    record_check(i)

Warriors did not break the record
Warriors did not break the record
Warriors did not break the record
Warriors tied the record!
Warriors broke the record!
Warriors broke the record!
Warriors broke the record!


## Mini-Assignment

This should take five minutes or so:

* Create a function
* It should check whether the number it takes as an argument equals 30
* If it does not equal 30, it should print "This is not Steph Curry's number"
* If it does equal 30, it should print "This is Steph Curry's number"
* Create a list of the numbers 23, 11, 30, 12, and 40
* Create a for loop with this list. Each iteration should run your new function using a number in this new list.


In [None]:
# Write your function here (change names as desired!):

def function_name(argument):
    # content of function here
    
    
# Create your list here:


# Write your loop here:


# Idea

Incorporate some string methods earlier in the session!

Make sure to highlight how Python's syntax is sensitive to indentation

# Let's build a geocoder!

Now that we've covered the basics of programming in Python, we're going to jump into a more complex, real-world example. Step by step, we will assemble a program that will:
1. read in a list of addresses from a CSV file,
2. clean up the data formatting,
3. geocode the addresses, and
4. export the geocoded data to a new CSV.

A script like this, which improves on ArcGIS's temperamental geocoding operations, is exactly the kind of tool that a planner might create as part of a typical workflow. In other words, we've reached the realm of the actually useful.

In [None]:
# We import the packages we need for this task. If you followed the instructions 
# on installing Python and making sure everything is set up correctly, these commands
# should look familiar. If not, the syntax is simple:

import pandas as pd
from geopy.geocoders import GoogleV3

In [None]:
# For our geocoding demo, we'll use a table of Oakland libraries I found
# through Code for Oakland: http://codeforoakland.org/data-sets/

# Reading a CSV into a pandas DataFrame is easy:
libraries = pd.read_csv('LibraryBranches.csv')

# Let's look at the table
libraries

In [None]:
# When you have a large DataFrame, it can be more convenient to see only the first few rows:
libraries.head() # defaults to 5 rows, but you can specify the number of rows

In [None]:
# Unlike Excel, you can't directly look at and modify cells in a table.
# Instead, you access data by specifying its *location.* There are two basic forms
# of location syntax, .loc[] and .iloc[] - note the square brackets, not parentheses.

# .loc[x, y]: look up the data at the location with row index named x and column named y
print(libraries.loc[0,'PHONE'])

# .iloc[m, n]: look up the data in the mth row and nth column (first column == 0)
print(libraries.iloc[2, 2])

## Important concept: _objects_

It is common in Python and other programming languages to store data in flexible relational structures known as **objects**. Objects have **attributes**: for example, a `Person` object might have `height` and `age` numeric attributes, plus a `name` string attribute.

If an object has a function as an attribute, that function is called a **method** of the object. So our `Person` object can have a `get_older()` method that increases the Person's `height` attribute. (Note that like other, stand-alone functions, this method includes a set of parentheses.) In the cell above, `head()` is a method of the `libraries` object.

An object's methods and other attributes are accessed via **dot notation**: if `drew` is a `Person` object, `drew.height` will return `drew`'s `height` attribute. `drew.get_older()` will increase `drew`'s `age`.

We're not going to look at how to define our own objects (known also as *classes*) today, but it's a good skill to pick up, and tutorials are out there. https://jeffknupp.com/blog/2014/06/18/improve-your-python-python-classes-and-object-oriented-programming/

Right now, we're about to interact with three useful objects that are already defined in the modules we've loaded: `DataFrame`, from `pandas`, and `Geocoder` and `Location` from `geopy.geocoders`. Let's see how we can combine some `DataFrame` attributes and methods to quickly modify our `libraries` object.

In [None]:
# These ALL-CAPS column names are irritating. Let's change them to lowercase:

libraries.columns = libraries.columns.str.lower()
libraries.head()

# What happened here?
# libraries.columns is an attribute of libraries (a pandas DataFrame object).
# The columns object is itself an Index object, containing the names of each column.
# libraries.columns.str allows us to access and modify each string in libraries.columns -
# in this case, we use the lower() method to convert each string to lowercase.
# So we are telling Python to overwrite libraries.columns (the set of column names)
# with a version of libraries.columns converted to lowercase.

In [None]:
# Let's look at just one column

libraries['name']

# libraries.name (dot notation) is equivalent to libraries['name'] (bracket notation).
# However, when you want to create a new column, you must use bracket notation.

In [None]:
# We need to clean up our data in preparation for geocoding.
# There are some extra dashes in the address field, and the city and state aren't specified.
# Pandas provides some powerful methods for quickly modifying many pieces of data:

libraries['full_address'] = libraries.address.str.replace('- ', '') + ', Oakland, CA'
libraries.head()

# What happened here?
# We looked at libraries.address (one column), used str to access that column's
# contents as strings, then used the replace() string method to remove dashes.
# Then we used the + operator to concatenate city and state to each address.
# Finally, we used bracket notation to create a new full_address column to store
# the results of this operation.

In [None]:
# Now that we've properly formatted our addresses, it's time to set up the Geocoder.

g = GoogleV3()

# g is a GoogleV3 Geocoder object. This object has multiple methods, or built-in functions. 
# We'll be using the geocode() method, which takes a street address and returns a lat/long pair
# stored as a Location object. (Objects are just organized groups of data.)
# There is also a reverse() method that takes a lat/long pair and returns a street address.
# More info: https://geopy.readthedocs.org/en/1.11.0/#geopy.geocoders.GoogleV3
# and https://developers.google.com/maps/documentation/geocoding/intro
# The Google Maps Geocoding API has a limit of 2500 addresses per 24-hour period.

In [None]:
# Let's see the Geocoder in action. 
home = g.geocode('1001 Chanin Brkley')
home

# Notice that the Google Maps Geocoding API is quite flexible - it was able
# to identify Channing Way from "Chanin" and Berkeley, CA from "Brkley"!

In [None]:
# So, when we pass an address string to the Geocoder's geocode() method, the Geocoder
# looks up that address via the Google Maps Geocoding API and returns the result as a Location object.
# Now we work our way through the various components of this Location object.
print(home[0]) # matched address - a string
print(home[1]) # lat/long pair - a tuple
print(home[1][0]) # latitude - a float
print(home[1][1]) # longitude - a float

In [None]:
# OK! So we know how to get what we need out of the Location object that the Geocoder returns.
# We're now going to use a powerful feature of pandas - "apply," which applies a function to each
# item in a Series. 

# First, we need to define a function that takes an address string as a parameter,
# passes it to our Geocoder object, and returns the lat/long pair.

def getLatLong(address):
    result = g.geocode(address)
    return result[1]

In [None]:
# Now we can *apply* the function we defined to each address in the full_address column.
# As before, we use bracket notation to store the results of the apply operation into
# a new latlong column.

libraries['latlong'] = libraries.full_address.apply(getLatLong)
libraries.head()

In [None]:
# It is a good idea to keep the lat/long pair itself. This is a common format
# that is used in web mapping (e.g. Leaflet). However, for some applications
# (e.g. ArcGIS, CartoDB), we will need separate latitude and longitude fields.
# Let's create those now, using a for loop.

for i in libraries.index:
    libraries.loc[i, 'latitude'] = libraries.loc[i, 'latlong'][0]
    libraries.loc[i, 'longitude'] = libraries.loc[i, 'latlong'][1]

libraries.head()

# What happened here?
# libraries.index, an attribute of libraries, stores the set of index values associated with
# each row in the libraries DataFrame. 'for i in libraries.index' tells the variable i to
# iterate over each value in libraries.index, allowing us to access and modify each row in libraries.
# So, for each row in libraries, this code assigns the 0th element of the tuple in latlong
# to a new latitude column, and the 1st element of the latlong tuple to a new longitude column.
# Note, as before, how we use indentation to indicate which statements are part of the for loop.

In [None]:
# Almost there now. Before we export our data to a CSV file, let's remove
# some columns we don't need.

libraries.drop(['objectid', 'address'], inplace = True, axis = 1)
libraries.head()

# What happened here?
# The drop() method removes data from a DataFrame. Its first argument is a list
# of entries to remove. The inplace keyword specifies that the drop operation
# should modify libraries directly. (By default, inplace = False - you can always
# use libraries_new = libraries.drop(xyz, inplace = False) to save the modified
# DataFrame to libraries_new.) Axis is either 0 for rows or 1 for columns.
# In this case, we set it = 1 to drop columns, not rows.

In [None]:
# Time to save our data to disk. pandas DataFrames have a very convenient
# to_csv() method. The index = False argument tells pandas not to write the index
# as its own column, again eliminating unnecessary data.

libraries.to_csv('LibraryBranchesGeocoded.csv', index = False)

In [None]:
# Finally, let's collect all the pieces of the puzzle into a single cell.
# I have added some try/except syntax to handle situations in which an address string
# is totally invalid. Google will try hard to match each address, but if it can't, 
# the Geocoder will return None instead of a Location object.
# I also added three print() statements to inform the user about what the program
# has done. See if you can figure out how each statement uses attributes and methods!

# import the needed packages
import pandas as pd
from geopy.geocoders import GoogleV3

# read the CSV into a pandas DataFrame
libraries = pd.read_csv('LibraryBranches.csv')
print('Read', len(libraries), 'records')

# clean up the column names and address field
libraries.columns = libraries.columns.str.lower()
libraries['full_address'] = libraries.address.str.replace('- ', '') + ', Oakland, CA'

# in case you want to see how the program handles junk data, uncomment the next line
# libraries.loc[0, 'full_address'] = 'dkjfbga;knga;sd'

# create a geopy Geocoder
g = GoogleV3()

# define helper function
def getLatLong(address):
    result = g.geocode(address)
    try:
        return result[1]
    except:
        return None

# geocode the addresses
libraries['latlong'] = libraries.full_address.apply(getLatLong)
print('Geocoded', sum(libraries.latlong.notnull()), 'addresses')

# extract latitude and longitude
for i in libraries.index:
    try:
        libraries.loc[i, 'latitude'] = libraries.loc[i, 'latlong'][0]
        libraries.loc[i, 'longitude'] = libraries.loc[i, 'latlong'][1]
    except:
        pass

# drop unnecessary columns
libraries.drop(['objectid', 'address'], inplace = True, axis = 1)

# export the DataFrame to a new CSV file
libraries.to_csv('LibraryBranchesGeocoded.csv', index = False)
print('Exported', len(libraries), 'records')

# Not bad for 25 lines of code.