# 쿼리 필수 요소

- 빅쿼리 특징
    1. 구조화, 준구조화( semi-structured )된 데이터를 위한 영구 스토리지를 제공하는 데이터 웨어하우스
    2. 데이터 분석을 위한 도구

- 기본 작업
    - create : 
        1. 새 레코드 추가
        2. SQL INSERT 문을 통한 로드 작업, 스트리밍 삽입 API를 통해 구현
        3. sql로 데이터 정의 언어 ( DDL : Data Definition Language )를 지원하는 테이블, 뷰, 머신러닝 모델 같은 데이터베이스 객체 생성
    - Read : 
        1. 레코드 검색 
        2. SQL SELECT 문과 벌크 read API로 구현
    - Update :
        1. 기존 레코드 수정
        2. 빅쿼리의 데이터 조작 언어 ( DML : DAata Manipulation Language )의 일부인 sql UPDATE 및 MERGE 문으로 구현
    - Delete :
        1. 기존 레코드 제거
        2. DML 작업 중 하나인 SQL DELETE로 구현

# 간단 쿼리

## select로 행 검색

In [None]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [12]:
query ="""
SELECT gender, tripduration
FROM `bigquery-public-data.new_york_citibike.citibike_trips` 
LIMIT 5
"""

df = pd.read_gbq(query = query, dialect='standard', project_id='project-bigquery-319110', auth_local_webserver=True)

Downloading: 100%|█████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 14.54rows/s]


In [13]:
df

Unnamed: 0,gender,tripduration
0,male,432
1,female,1186
2,male,799
3,female,238
4,male,668


## AS로 컬럼 이름에 별칭 지정

In [14]:
query ="""
SELECT gender, tripduration/60 as duration_minutes
FROM `bigquery-public-data.new_york_citibike.citibike_trips` 
LIMIT 5
"""

df1 = pd.read_gbq(query = query, dialect='standard', project_id='project-bigquery-319110', auth_local_webserver=True)

Downloading: 100%|█████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 16.08rows/s]


In [15]:
df1

Unnamed: 0,gender,duration_minutes
0,male,7.2
1,female,19.766667
2,male,13.316667
3,female,3.966667
4,male,11.133333


## WHERE로 필터링

In [24]:
query ="""
SELECT gender, tripduration
FROM `bigquery-public-data.new_york_citibike.citibike_trips`
WHERE tripduration >= 300 AND tripduration < 600 AND gender = 'female'
LIMIT 5
"""

df = pd.read_gbq(query = query, dialect='standard', project_id='project-bigquery-319110', auth_local_webserver=True)

Downloading: 100%|█████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 14.32rows/s]


In [25]:
df

Unnamed: 0,gender,tripduration
0,female,309
1,female,304
2,female,476
3,female,406
4,female,365


In [26]:
query ="""
SELECT gender, tripduration / 60 AS minutes
FROM `bigquery-public-data.new_york_citibike.citibike_trips`
WHERE (tripduration / 60 ) < 10 
LIMIT 5
"""
# where절에서는 select절에서 지정한 별칭을 참조할 수 없다.
df = pd.read_gbq(query = query, dialect='standard', project_id='project-bigquery-319110', auth_local_webserver=True)

Downloading: 100%|█████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 14.55rows/s]


In [27]:
df

Unnamed: 0,gender,minutes
0,male,6.183333
1,male,9.25
2,male,5.466667
3,male,3.766667
4,male,1.766667


## SELECT *, EXCEPT, REPLACE

In [29]:
query = '''
SELECT *
FROM `bigquery-public-data.new_york_citibike.citibike_trips`
LIMIT 1000
'''

df = pd.read_gbq(query = query, dialect='standard', project_id='project-bigquery-319110', auth_local_webserver=True)

Downloading: 100%|█████████████████████████████████████████████████████████████| 1000/1000 [00:00<00:00, 1348.72rows/s]


In [30]:
df

Unnamed: 0,tripduration,starttime,stoptime,start_station_id,start_station_name,start_station_latitude,start_station_longitude,end_station_id,end_station_name,end_station_latitude,end_station_longitude,bikeid,usertype,birth_year,gender,customer_plan
0,432,2013-09-16 19:22:43.000,2013-09-16 19:29:55.000,509,9 Ave & W 22 St,40.745497,-74.001971,442,W 27 St & 7 Ave,40.746647,-73.993915,18447,Subscriber,1951,male,
1,1186,2015-12-30 13:02:38.000,2015-12-30 13:22:25.000,280,E 10 St & 5 Ave,40.733320,-73.995101,254,W 11 St & 6 Ave,40.735324,-73.998004,22598,Subscriber,1945,female,
2,799,2017-09-02 16:27:37.000,2017-09-02 16:40:57.000,335,Washington Pl & Broadway,40.729039,-73.994046,540,Lexington Ave & E 29 St,40.743116,-73.982154,28833,Subscriber,1997,male,
3,238,2017-11-15 06:57:09.000,2017-11-15 07:01:08.000,146,Hudson St & Reade St,40.716250,-74.009106,387,Centre St & Chambers St,40.712733,-74.004607,21338,Subscriber,1998,female,
4,668,2013-11-07 15:12:07.000,2013-11-07 15:23:15.000,529,W 42 St & 8 Ave,40.757570,-73.990985,352,W 56 St & 6 Ave,40.763406,-73.977225,19888,Subscriber,1949,male,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,1132,2018-02-04 09:52:10.349,2018-02-04 10:11:02.478,119,Park Ave & St Edwards St,40.696089,-73.978034,412,Forsyth St & Canal St,40.715815,-73.994224,18867,Subscriber,1947,female,
996,293,2014-01-27 17:13:15.000,2014-01-27 17:18:08.000,509,9 Ave & W 22 St,40.745497,-74.001971,446,W 24 St & 7 Ave,40.744876,-73.995299,16126,Subscriber,1942,male,
997,734,2017-07-25 12:07:43.000,2017-07-25 12:19:58.000,455,1 Ave & E 44 St,40.750020,-73.969053,3466,W 45 St & 6 Ave,40.756687,-73.982577,19261,Customer,1996,male,
998,266,2013-08-04 19:57:02.000,2013-08-04 20:01:28.000,294,Washington Square E,40.730494,-73.995721,345,W 13 St & 6 Ave,40.736494,-73.997044,14873,Subscriber,1941,male,


In [33]:
query = '''
SELECT * EXCEPT(short_name,last_reported)
FROM `bigquery-public-data.new_york_citibike.citibike_stations`
WHERE name LIKE '%Riverside%'
'''
df = pd.read_gbq(query = query, dialect='standard', project_id='project-bigquery-319110', auth_local_webserver=True)
df

Downloading: 100%|███████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 37.67rows/s]


Unnamed: 0,station_id,name,latitude,longitude,region_id,rental_methods,capacity,eightd_has_key_dispenser,num_bikes_available,num_bikes_disabled,num_docks_available,num_docks_disabled,is_installed,is_renting,is_returning,eightd_has_available_keys
0,4193,W 181 St & Riverside Dr,-73.94186,40.85168,71,"CREDITCARD,KEY",21,False,1,2,18,0,True,True,True,False
1,4054,W 163 St & Riverside Dr,-73.945446,40.839278,71,"CREDITCARD,KEY",19,False,2,2,15,0,True,True,True,False
2,4009,Riverside Dr & W 145 St,-73.951989,40.827305,71,"CREDITCARD,KEY",35,False,2,1,32,0,True,True,True,False
3,4014,Riverside Dr & W 148 St,-73.950426,40.829114,71,"CREDITCARD,KEY",22,False,3,0,19,0,True,True,True,False
4,3984,W 140 St & Riverside Dr,-73.954743,40.824013,71,"CREDITCARD,KEY",23,False,5,1,17,0,True,True,True,False
5,4007,Riverside Dr & W 138 St,-73.955857,40.823168,71,"CREDITCARD,KEY",30,False,8,0,22,0,True,True,True,False
6,3166,Riverside Dr & W 72 St,-73.985624,40.780578,71,"CREDITCARD,KEY",41,False,8,1,32,0,True,True,True,False
7,3331,Riverside Dr & W 104 St,-73.971146,40.801343,71,"CREDITCARD,KEY",58,False,11,7,40,0,True,True,True,False
8,4247,Riverside Dr & W 153 St,-73.949702,40.832164,71,"CREDITCARD,KEY",21,False,12,6,3,0,True,True,True,False
9,3437,Riverside Dr & W 91 St,-73.977004,40.793135,71,"CREDITCARD,KEY",59,False,17,1,41,0,True,True,True,False
