# Chapter 3

In [10]:
# conda install pymysql

# Import create_engine function
from sqlalchemy import create_engine

# Create an engine to the census database
engine = create_engine('mysql+pymysql://' +
                       'student:datacamp' +
                       '@courses.csrrinzqubik.us-east-1.rds.amazonaws.com:3306/' +
                       'census')

# Print the table names
print(engine.table_names())

['census', 'state_fact']


In [11]:
# Data prep - the remote mysql works!
from sqlalchemy import select, Table, MetaData, desc, func
metadata = MetaData()
census = Table('census', metadata, autoload=True, autoload_with=engine)
connection = engine.connect()

In [12]:
# Build query to return state names by population difference from 2008 to 2000: stmt
stmt = select([census.columns.state,
               (census.columns.pop2008 - census.columns.pop2000).label('pop_change')])

# Append group by for the state: stmt
stmt = stmt.group_by(census.columns.state)

# Append order by for pop_change descendingly: stmt
stmt = stmt.order_by(desc('pop_change'))

# Return only 5 results: stmt
stmt = stmt.limit(5)

# Use connection to execute the statement and fetch all results
results = connection.execute(stmt).fetchall()

# Print the state and population change for each record
for result in results:
    print('{}:{}'.format(result.state, result.pop_change))


Texas:40137
California:35406
Florida:21954
Arizona:14377
Georgia:13357


In [13]:
# import case, cast and Float from sqlalchemy
from sqlalchemy import case, cast, Float

# Build an expression to calculate female population in 2000
female_pop2000 = func.sum(
    case([
        (census.columns.sex == 'F', census.columns.pop2000)
    ], else_=0))

# Cast an expression to calculate total population in 2000 to Float
total_pop2000 = cast(func.sum(census.columns.pop2000), Float)

# Build a query to calculate the percentage of females in 2000: stmt
stmt = select([female_pop2000 / total_pop2000 * 100])

# Execute the query and store the scalar result: percent_female
percent_female = connection.execute(stmt).scalar()

# Print the percentage
print(percent_female)


50.7455


  self.dialect.type_compiler.process(cast.typeclause.type))


## Relationships

In [14]:
# data prep
state_fact = Table('state_fact', metadata, autoload=True, autoload_with=engine)

In [16]:
# Build a statement to join census and state_fact tables: stmt
stmt = select([census.columns.pop2000,
               state_fact.columns.abbreviation])

# Execute the statement and get the first result: result
result = connection.execute(stmt).first()

# Loop over the keys in the result object and print the key and value
for key in result.keys():
    print(key, getattr(result, key))

pop2000 89600
abbreviation IL


In [None]:
# Build a statement to select the census and state_fact tables: stmt
stmt = select([census, state_fact])

# Add a select_from clause that wraps a join for the census and state_fact
# tables where the census state column and state_fact name column match
stmt = stmt.select_from(
    ____(____, census.columns.state == state_fact.columns.name))

# Execute the statement and get the first result: result
result = connection.execute(stmt).first()

# Loop over the keys in the result object and print the key and value
for key in result.keys():
    print(key, getattr(result, key))
