# SQL Part 3

## Import pandas and MySQLdb

In [None]:
from sqlalchemy import create_engine
import pandas as pd

## Create the connection to MySQL database

- Define the connection to mysql database (mysql://user:passod@host/databse). More detail: http://zetcode.com/db/sqlalchemy/

In [None]:
eng = create_engine("mysql://root:spr18@localhost/PHC7065")
con = eng.connect()

## Create some tables

In [None]:
pd.read_sql(sql="SELECT * FROM Subject",con=con)

In [None]:
sqlq = '''
        INSERT INTO County(id,state_id,name,income)
        VALUES  (1,1,'Alachua',78987),
                (2,1,'Orange',87689),
                (3,1,'Marion',65908),
                (1,2,'Newton',56765),
                (2,2,'Burke',67890),
                (1,3,'Glenn',98678),
                (2,3,'Kings',87908);
'''

In [None]:
con.execute(sqlq)

In [None]:
pd.read_sql(sql="SELECT * FROM County",con=con)

In [None]:
sqlq = '''
        INSERT INTO State(name,policy,start_date,end_date)
        VALUES  ('Florida',1,'2002-11-23','12/23/2006'),
                ('Georgia',0,NULL,'12/25/2007'),
                ('California',1,'2004-12-23','11/03/2009'),
                ('Washington',1,'2003-11-20','08/23/2007');
'''

In [None]:
pd.read_sql(sql="SELECT * FROM State",con=con)

## INNER JOIN

In [None]:
sqlq = '''
        SELECT Subject.name, State.name as state_name, State.policy
        FROM Subject
        INNER JOIN State ON Subject.state_id=State.id
        ;
'''
pd.read_sql(sql=sqlq,con=con)

## LEFT JOIN

In [None]:
sqlq = '''
        SELECT Subject.name, State.name as state_name, State.policy
        FROM Subject
        LEFT JOIN State ON Subject.state_id=State.id
        ;
'''
pd.read_sql(sql=sqlq,con=con)

## RIGHT JOIN

In [None]:
sqlq = '''
        SELECT Subject.name, State.name as state_name, State.policy
        FROM Subject
        RIGHT JOIN State ON Subject.state_id=State.id
        ;
'''
pd.read_sql(sql=sqlq,con=con)

## FULL JOIN

In [None]:
sqlq = '''
        SELECT Subject.name, State.name as state_name, State.policy
        FROM Subject
        LEFT JOIN State ON Subject.state_id=State.id
        UNION
        SELECT Subject.name, State.name as state_name, State.policy
        FROM Subject
        RIGHT JOIN State ON Subject.state_id=State.id
        ;
'''
pd.read_sql(sql=sqlq,con=con)

## CROSS JOIN

In [None]:
sqlq = '''
        SELECT Subject.name,State.name
        FROM Subject
        CROSS JOIN State 
        ;
'''
pd.read_sql(sql=sqlq,con=con)

## Theta JOIN

In [None]:
sqlq = '''
        SELECT Subject.name, State.name as state_name, State.policy,Subject.state_id,State.id
        FROM Subject
        JOIN State ON Subject.state_id>=State.id
        ;
'''
pd.read_sql(sql=sqlq,con=con)

## Some Useful String Functions

In [None]:
sqlq = '''
        SELECT UPPER(LEFT(name,2)) as new_name,LENGTH(name) as length_name
        FROM State
        ;
'''
pd.read_sql(sql=sqlq,con=con)

In [None]:
sqlq = '''
        SELECT TRIM(LEADING 'F' FROM name) as new_name
        FROM State
        ;
'''
pd.read_sql(sql=sqlq,con=con)

In [None]:
sqlq = '''
        SELECT State.name as name_State, County.name as name_County, CONCAT(County.name,' County, ',State.name) as name
        FROM State
        INNER JOIN County ON State.id=County.state_id
        ;
'''
pd.read_sql(sql=sqlq,con=con)

## Useful Function for Time

In [None]:
sqlq = '''
        SELECT end_date, STR_TO_DATE(end_date,'%%m/%%d/%%Y') AS cleaned_end_date
        FROM State
        ;
'''
pd.read_sql(sql=sqlq,con=con)

In [None]:
sqlq = '''
        SELECT EXTRACT(MONTH FROM test.cleaned_end_date) AS end_month  
        FROM
        (SELECT end_date, STR_TO_DATE(end_date,'%%m/%%d/%%Y') AS cleaned_end_date
        FROM State) AS test
        ;
'''
pd.read_sql(sql=sqlq,con=con)

## COALESCE

In [None]:
sqlq = '''
        SELECT *, COALESCE(start_date,'2001-01-01') as recode_start_date  
        FROM State
        ;
'''
pd.read_sql(sql=sqlq,con=con)

# NHIS Data

## Import the 2015 Family File and Household File (https://www.cdc.gov/nchs/nhis/nhis_2015_data_release.htm)

In [None]:
family = pd.read_csv('../WK3/dat/familyxx.csv')
household = pd.read_csv('../WK3/dat/househld.csv')

In [None]:
# check the data
family.head()

In [None]:
household.head()

## Export the data to MySQL database

In [None]:
family.to_sql(name='Family',con=eng,if_exists='replace',chunksize=100)
household.to_sql(name='Household',con=eng,if_exists='replace',chunksize=100)

## Check the exported data

In [None]:
sqlq = '''
        SELECT *
        FROM Family
        LIMIT 5
        ;
'''
pd.read_sql(sql=sqlq,con=con)

In [None]:
sqlq = '''
        SELECT *
        FROM Household
        LIMIT 5
        ;
'''
pd.read_sql(sql=sqlq,con=con)

## Subqueries

In [None]:
sqlq = '''
        DROP TABLE IF EXISTS Subject;
        CREATE TABLE Subject(
        id INT NOT NULL AUTO_INCREMENT,
        name VARCHAR(20) NOT NULL,
        gender INT NOT NULL,
        age INT NOT NULL,
        race INT NOT NULL,
        county_id INT NOT NULL,
        state_id INT NOT NULL,
        PRIMARY KEY (id)
        );
'''

In [None]:
sqlq = '''
        DROP TABLE IF EXISTS County;
        CREATE TABLE County(
        id INT NOT NULL,
        state_id INT NOT NULL,
        name VARCHAR(20) NOT NULL,
        income INT NOT NULL,
        PRIMARY KEY (id,state_id)
        );
'''

In [None]:
sqlq = '''
        DROP TABLE IF EXISTS State;
        CREATE TABLE State(
        id INT NOT NULL AUTO_INCREMENT,
        name VARCHAR(20) NOT NULL,
        policy INT NOT NULL,
        start_date DATE,
        end_date VARCHAR(20),
        PRIMARY KEY (id)
        );
'''

## Insert some data