## bq_insert_overwrite 모듈 예제

In [9]:
PROJECT = "emart-datafabric"
DATASET = "common_dev"
TABLE = "dfm_sample_st_log"
LIMIT = 30

!nslookup ontap-prd.jupyterhub.svc.cluster.local
import socket,os
print("===========================================================")
print(socket.gethostname())
MYIP = socket.gethostbyname(socket.gethostname())
MYIP
print("===========================================================")

import uuid
from pyspark.sql import SparkSession
tmp_uuid = str(uuid.uuid4())
app_name = f"emart-{os.environ.get('USER', 'default')}-{tmp_uuid}"

spark_jars = "hdfs:///jars/spark-bigquery-with-dependencies_2.12-latest.jar"
spark = (
    SparkSession.builder.config("spark.app.name", app_name)
    .config("spark.driver.memory", "6g")
    .config("spark.executor.memory", "8g")
    .config("spark.shuffle.service.enabled", "true")
    .config("spark.dynamicAllocation.enabled", "true")
    .config("spark.dynamicAllocation.maxExecutors", "200")
    .config("spark.driver.maxResultSize", "6g")
    .config("spark.rpc.message.maxSize", "1024")
    .config("spark.yarn.queue", "default")
    .config("spark.ui.enabled", "false")
    .config("spark.driver.host", MYIP)
    .config("spark.driver.bindAddress", MYIP)
    .config("spark.port.maxRetries", "128")
    .config(
        "spark.jars",
        spark_jars,
    )
    .enableHiveSupport()
    .getOrCreate()
)


In [2]:
from pydatafabric.gcp import bq_to_df, df_to_bq_table, bq_insert_overwrite, get_spark
from pydatafabric.gcp import df_to_bq_table

#spark = get_spark()
print(spark)

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
22/10/04 14:47:35 INFO org.apache.spark.SparkEnv: Registering MapOutputTracker
22/10/04 14:47:35 INFO org.apache.spark.SparkEnv: Registering BlockManagerMaster
22/10/04 14:47:35 INFO org.apache.spark.SparkEnv: Registering BlockManagerMasterHeartbeat
22/10/04 14:47:35 INFO org.apache.spark.SparkEnv: Registering OutputCommitCoordinator


### bq를 df로 받아서 df를 다시 다른 데이터셋으로 저장하기

In [3]:
df = bq_to_df(f"""
    select *
    from  `{PROJECT}.{DATASET}.{TABLE}`
    limit {LIMIT}
""", spark_session=spark)

df_to_bq_table(df, "temp_1d", TABLE, project=PROJECT)

                                                                                

### 기존 테이블 project 바꾸어서 insert overwrite하기

In [4]:
bq_insert_overwrite(f"""
    select * 
    from `{PROJECT}.temp_1d.{TABLE}`
""", f"smart-ruler-304409.temp_1d.{TABLE}", project="smart-ruler-304409")

destination: smart-ruler-304409.temp_1d.dfm_sample_st_log
total_rows: 30
slot_secs: 0.64



### 데이터 확인하기

In [5]:
from pydatafabric.gcp import load_bigquery_ipython_magic

load_bigquery_ipython_magic()

In [6]:
%%bq
    select * from emart-datafabric.common_dev.dfm_sample_st_log LIMIT 10

Query complete after 0.01s: 100%|██████████| 2/2 [00:00<00:00, 1693.30query/s]                        
Downloading: 100%|██████████| 10/10 [00:00<00:00, 12.05rows/s]

BigQuery execution took 1 seconds.





Unnamed: 0,seq,site_code,url,guid1,guid2,insert_dt,device_type,os_type,os_name,browser_name,cust_id,rgst_dt,rgst_tm,ip_address,refer_url,tracking_code,dt
0,9999999999.0,40,/main/main.do,E5777BF4238F6E6EE0531F1D64AE2FDD,E5777BF423906E6EE0531F1D64AE2FDD,2022-08-06 01:36:38,E,E,OS X,Chrome,ad4f6a9d00a905a58e3b19d3b2443717a11cd539244787...,20220806,13638,39.115.53.89,https://deva-eapp.emart.com/,,2022-08-06
1,9999999999.0,40,/login/login.do,E5777BF4238F6E6EE0531F1D64AE2FDD,E5777BF423906E6EE0531F1D64AE2FDD,2022-08-06 01:36:40,E,E,OS X,Chrome,ad4f6a9d00a905a58e3b19d3b2443717a11cd539244787...,20220806,13640,39.115.53.89,https://deva-eapp.emart.com/main/main.do,,2022-08-06
2,9999999999.0,40,/common/EncryptData.do?generateKeypair=true,E5777BF4238F6E6EE0531F1D64AE2FDD,E5777BF423906E6EE0531F1D64AE2FDD,2022-08-06 01:36:44,E,E,OS X,Chrome,ad4f6a9d00a905a58e3b19d3b2443717a11cd539244787...,20220806,13644,39.115.53.89,https://deva-eapp.emart.com/login/login.do,,2022-08-06
3,9999999999.0,40,/login/processLogin.do,E5777BF4238F6E6EE0531F1D64AE2FDD,E5777BF423906E6EE0531F1D64AE2FDD,2022-08-06 01:36:44,E,E,OS X,Chrome,ad4f6a9d00a905a58e3b19d3b2443717a11cd539244787...,20220806,13644,39.115.53.89,https://deva-eapp.emart.com/login/login.do,,2022-08-06
4,9999999999.0,40,/login/processLoginResult.do?result=00,E5777BF4238F6E6EE0531F1D64AE2FDD,E5777BF423906E6EE0531F1D64AE2FDD,2022-08-06 01:36:45,E,E,OS X,Chrome,ad4f6a9d00a905a58e3b19d3b2443717a11cd539244787...,20220806,13645,39.115.53.89,https://deva-eapp.emart.com/login/login.do,,2022-08-06
5,9999999999.0,40,/main/main.do,E5777BF4238F6E6EE0531F1D64AE2FDD,E5777BF423906E6EE0531F1D64AE2FDD,2022-08-06 01:36:45,E,E,OS X,Chrome,ad4f6a9d00a905a58e3b19d3b2443717a11cd539244787...,20220806,13645,39.115.53.89,https://deva-eapp.emart.com/login/login.do,,2022-08-06
6,9999999999.0,40,/app/favoriteInfo.do?storeTp=E,E5777BF4238F6E6EE0531F1D64AE2FDD,E5777BF423906E6EE0531F1D64AE2FDD,2022-08-06 01:36:46,E,E,OS X,Chrome,ad4f6a9d00a905a58e3b19d3b2443717a11cd539244787...,20220806,13646,39.115.53.89,https://deva-eapp.emart.com/main/main.do,,2022-08-06
7,9999999999.0,40,/customersurvey/checkCustomerSurveyCustomList....,E5777BF4238F6E6EE0531F1D64AE2FDD,E5777BF423906E6EE0531F1D64AE2FDD,2022-08-06 01:36:46,E,E,OS X,Chrome,ad4f6a9d00a905a58e3b19d3b2443717a11cd539244787...,20220806,13646,39.115.53.89,https://deva-eapp.emart.com/main/main.do,,2022-08-06
8,9999999999.0,40,/main/main.do,E5777BF4238F6E6EE0531F1D64AE2FDD,E58C4FB2988C042BE0531F1D64AEBDBA,2022-08-06 13:49:56,E,E,OS X,Chrome,,20220806,134956,39.115.53.89,https://deva-eapp.emart.com/,,2022-08-06
9,9999999999.0,40,/login/login.do,E5777BF4238F6E6EE0531F1D64AE2FDD,E58C4FB2988C042BE0531F1D64AEBDBA,2022-08-06 13:50:00,E,E,OS X,Chrome,,20220806,135000,39.115.53.89,https://deva-eapp.emart.com/main/main.do,,2022-08-06


In [7]:
spark.stop()