# Aula 3: SQL

## Introdução

Nesta aula veremos as principais queries de consulta em sql

## Conexao no db4free via sqlalchemy

In [None]:
import yaml
import sqlalchemy
import pandas as pd
# from pandasql import sqldf

In [None]:
# Load SQL credentials
with open('credentials.yml', 'r') as file:
    credentials = yaml.load(file, Loader=yaml.FullLoader)

- A engine deve estar no formato:
- mysql://`user`:`password`@`host`:`port`/`database`

In [None]:
engine = sqlalchemy.create_engine(f'mysql+mysqlconnector://{credentials["user"]}:{credentials["password"]}@{credentials["host"]}:{credentials["port"]}/{credentials["database"]}') # connect to server

### UPLOAD TABLE

In [None]:
raw_data = {
        'subject_id': ['1', '2', '3', '4', '5'],
        'first_name': ['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung'], 
        'last_name': ['Anderson', 'Ackerman', 'Ali', 'Aoni', 'Atiches'],
        'value': [20, 30, 50, 80, 100]}
df_a = pd.DataFrame(raw_data, columns = ['subject_id', 'first_name', 'last_name', 'value'])
df_a

In [None]:
raw_data = {
        'subject_id': ['4', '5', '6', '7', '8'],
        'first_name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'], 
        'last_name': ['Bonder', 'Black', 'Balwner', 'Brice', 'Btisan'],
        'value': [15, 5, 70, 35, 10]}
df_b = pd.DataFrame(raw_data, columns = ['subject_id', 'first_name', 'last_name', 'value'])
df_b

In [None]:
df_a.to_sql('df_a', engine, if_exists='replace')
df_b.to_sql('df_b', engine, if_exists='replace')

In [19]:
query = '''
    SELECT *
    FROM df_a
'''
pd.read_sql_query(query, engine)

Unnamed: 0,index,subject_id,first_name,last_name,value
0,0,1,Alex,Anderson,20
1,1,2,Amy,Ackerman,30
2,2,3,Allen,Ali,50
3,3,4,Alice,Aoni,80
4,4,5,Ayoung,Atiches,100


In [18]:
query = '''
    SELECT *
    FROM df_b
'''
pd.read_sql_query(query, engine)

Unnamed: 0,index,subject_id,first_name,last_name,value
0,0,4,Billy,Bonder,15
1,1,5,Brian,Black,5
2,2,6,Bran,Balwner,70
3,3,7,Bryce,Brice,35
4,4,8,Betty,Btisan,10


## JOIN

![](https://www.dofactory.com/img/sql/sql-joins.png)

In [14]:
query = '''
    SELECT *
    FROM df_a AS a
    JOIN df_b AS b
    ON a.subject_id = b.subject_id
'''
pd.read_sql_query(query, engine)

Unnamed: 0,index,subject_id,first_name,last_name,value,index.1,subject_id.1,first_name.1,last_name.1,value.1
0,3,4,Alice,Aoni,80,0,4,Billy,Bonder,15
1,4,5,Ayoung,Atiches,100,1,5,Brian,Black,5


In [None]:
# query = '''
#     SELECT *
#     FROM df_a AS a
#     JOIN df_b AS b
#     ON a.subject_id = b.subject_id
# '''

# sqldf(query)

In [15]:
query = '''
    SELECT *
    FROM df_a AS a
    INNER JOIN df_b AS b
    ON a.subject_id = b.subject_id
'''
pd.read_sql_query(query, engine)

Unnamed: 0,index,subject_id,first_name,last_name,value,index.1,subject_id.1,first_name.1,last_name.1,value.1
0,3,4,Alice,Aoni,80,0,4,Billy,Bonder,15
1,4,5,Ayoung,Atiches,100,1,5,Brian,Black,5


### LEFT JOIN

In [16]:
query = '''
    SELECT *
    FROM df_a
'''
pd.read_sql_query(query, engine)

Unnamed: 0,index,subject_id,first_name,last_name,value
0,0,1,Alex,Anderson,20
1,1,2,Amy,Ackerman,30
2,2,3,Allen,Ali,50
3,3,4,Alice,Aoni,80
4,4,5,Ayoung,Atiches,100


In [20]:
query = '''
    SELECT *
    FROM df_a AS a
    LEFT JOIN df_b AS b
    ON a.subject_id = b.subject_id
'''
pd.read_sql_query(query, engine)

Unnamed: 0,index,subject_id,first_name,last_name,value,index.1,subject_id.1,first_name.1,last_name.1,value.1
0,0,1,Alex,Anderson,20,,,,,
1,1,2,Amy,Ackerman,30,,,,,
2,2,3,Allen,Ali,50,,,,,
3,3,4,Alice,Aoni,80,0.0,4.0,Billy,Bonder,15.0
4,4,5,Ayoung,Atiches,100,1.0,5.0,Brian,Black,5.0


### RIGHT JOIN

In [21]:
query = '''
    SELECT *
    FROM df_a AS a
    RIGHT JOIN df_b AS b
    ON a.subject_id = b.subject_id
'''
pd.read_sql_query(query, engine)

Unnamed: 0,index,subject_id,first_name,last_name,value,index.1,subject_id.1,first_name.1,last_name.1,value.1
0,3.0,4.0,Alice,Aoni,80.0,0,4,Billy,Bonder,15
1,4.0,5.0,Ayoung,Atiches,100.0,1,5,Brian,Black,5
2,,,,,,2,6,Bran,Balwner,70
3,,,,,,3,7,Bryce,Brice,35
4,,,,,,4,8,Betty,Btisan,10


## UNION

In [22]:
query = '''
    SELECT * FROM df_a    
    UNION
    SELECT * FROM df_b
'''
pd.read_sql_query(query, engine)

Unnamed: 0,index,subject_id,first_name,last_name,value
0,0,1,Alex,Anderson,20
1,1,2,Amy,Ackerman,30
2,2,3,Allen,Ali,50
3,3,4,Alice,Aoni,80
4,4,5,Ayoung,Atiches,100
5,0,4,Billy,Bonder,15
6,1,5,Brian,Black,5
7,2,6,Bran,Balwner,70
8,3,7,Bryce,Brice,35
9,4,8,Betty,Btisan,10


In [None]:
# query = '''
#     SELECT * FROM df_a    
#     UNION
#     SELECT * FROM df_b
# '''

# sqldf(query)

## CASE

Comando `CASE` se assemelha a uma condicional `se..., então` e sua sintaxe é:

`CASE
    WHEN condition1 THEN result1
    WHEN condition2 THEN result2
    WHEN conditionN THEN resultN
    ELSE result
END;`

In [23]:
query = '''
    SELECT * FROM df_a    

'''
pd.read_sql_query(query, engine)

Unnamed: 0,index,subject_id,first_name,last_name,value
0,0,1,Alex,Anderson,20
1,1,2,Amy,Ackerman,30
2,2,3,Allen,Ali,50
3,3,4,Alice,Aoni,80
4,4,5,Ayoung,Atiches,100


In [26]:
query = '''
    SELECT *, 
        CASE 
            WHEN value > 50 THEN "maior_que_50"
            WHEN value >= 30 THEN "maior_que_30"
            ELSE "menor_que_30"
        END AS value_category
            
    FROM df_a    

'''
pd.read_sql_query(query, engine)

Unnamed: 0,index,subject_id,first_name,last_name,value,value_category
0,0,1,Alex,Anderson,20,menor_que_30
1,1,2,Amy,Ackerman,30,maior_que_30
2,2,3,Allen,Ali,50,maior_que_30
3,3,4,Alice,Aoni,80,maior_que_50
4,4,5,Ayoung,Atiches,100,maior_que_50


In [None]:
# query = '''
#     SELECT *, 
#         CASE 
#             WHEN value > 50 THEN "maior_que_50"
#             WHEN value > 20 THEN "maior_que_30"
#             ELSE "menor_que_30"
#         END AS value_category
            
#     FROM df_a    

# '''

# sqldf(query)