In [1]:
DT_NODASH = "2022-09-23"
GCP_PROJECT = "emart-datafabric"

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from pydatafabric.vault_utils import get_secrets

oracle_info = get_secrets(mount_point="datafabric",path="oracle/datafabric/oracle-db")

In [31]:
from pydatafabric.ye import get_spark

spark = get_spark(extra_jars="gs://emart-datafabric-resources/jars/ojdbc8.jar")

In [32]:
spark.conf.set("spark.sql.debug.maxToStringFields", 2000)

In [33]:
df = spark.read.format("jdbc") \
    .option("url", oracle_info['jdbc_url']) \
    .option("user", oracle_info['user']) \
    .option("password", oracle_info['password']) \
    .option("driver", "oracle.jdbc.driver.OracleDriver") \
    .option("dbtable", "(SELECT * FROM C##DPC.GTPSD_EP_PAY_RESULT_A) a") \
    .load()

In [34]:
df.show()

+------------+--------+------+-------+----------+--------------+--------------------+--------------+--------------------+----------+---------------+-------------------+---------+-------------------+---------+
|POS_STR_CODE|BIZ_DATE|POS_NO|TRAN_NO|TRAN_TCODE|TRAN_TYPE_CODE|            BAR_CODE|    TIME_STAMP|        MCH_ORDER_NO|PAY_RESULT|            MSG|            CRTN_DT|  CRTN_ID|             CHG_DT|   CHG_ID|
+------------+--------+------+-------+----------+--------------+--------------------+--------------+--------------------+----------+---------------+-------------------+---------+-------------------+---------+
|        4700|20220310|  1032|   8563|        00|            00|27182371100613712...|20220310140522| 2022031014050904185|     00000|             OK|2022-03-10 23:43:07|TRNROUTER|2022-03-10 23:43:07|TRNROUTER|
|        4700|20220310|  1032|   8564|        00|            00|27182400652754379...|20220310140729| 2022031014071904186|     00000|             OK|2022-03-10 23:43

In [35]:
from pyspark.sql.functions import *
df = df.withColumn("DT", to_date(df.BIZ_DATE, "yyyyMMdd"))

for col in df.columns:
    df = df.withColumnRenamed(col, col.lower())

In [36]:
df.show()

+------------+--------+------+-------+----------+--------------+--------------------+--------------+--------------------+----------+---------------+-------------------+---------+-------------------+---------+----------+
|pos_str_code|biz_date|pos_no|tran_no|tran_tcode|tran_type_code|            bar_code|    time_stamp|        mch_order_no|pay_result|            msg|            crtn_dt|  crtn_id|             chg_dt|   chg_id|        dt|
+------------+--------+------+-------+----------+--------------+--------------------+--------------+--------------------+----------+---------------+-------------------+---------+-------------------+---------+----------+
|        4700|20220310|  1032|   8563|        00|            00|27182371100613712...|20220310140522| 2022031014050904185|     00000|             OK|2022-03-10 23:43:07|TRNROUTER|2022-03-10 23:43:07|TRNROUTER|2022-03-10|
|        4700|20220310|  1032|   8564|        00|            00|27182400652754379...|20220310140729| 2022031014071904186

In [42]:
from pydatafabric.gcp import df_to_bq_table, bq_insert_overwrite

# 1. bq 테이블 생성
df_to_bq_table(df, "temp_1d", "temp_gtpsd_ep_pay_result_a", project=GCP_PROJECT) # 프로젝트 꼭 지정

# 2. bq 데이터 저장
bq_insert_overwrite(f"SELECT * from temp_1d.temp_gtpsd_ep_pay_result_a", f"{GCP_PROJECT}.temp_1d.temp_gtpsd_ep_pay_result_a", partition="dt")    

                                                                                

destination: smart-ruler-304409.temp_1d.temp_gtpsd_ep_pay_result_a
total_rows: 6972
slot_secs: 0.644



In [49]:
from pydatafabric.gcp import get_bigquery_client

bq = get_bigquery_client(project=PROJECT) # 프로젝트 꼭 지정
r = bq.query(f"SELECT * FROM temp_1d.temp_gtpsd_ep_pay_result_a WHERE dt='{DT_NODASH}' LIMIT 10")

In [50]:
r.result().total_rows

10

In [51]:
df = r.result().to_dataframe()
df

Unnamed: 0,pos_str_code,biz_date,pos_no,tran_no,tran_tcode,tran_type_code,bar_code,time_stamp,mch_order_no,pay_result,msg,crtn_dt,crtn_id,chg_dt,chg_id,dt
0,4700,20220923,1023,3530,0,0,2718212794170774913267,20220923131326,2022092313131710033,0,OK,2022-09-23 13:13:43+00:00,TRNROUTER,2022-09-23 13:13:43+00:00,TRNROUTER,2022-09-23
1,4700,20220923,6101,8469,0,0,2718215187531159959266,20220923174330,2022092317432310094,0,OK,2022-09-23 17:43:54+00:00,TRNROUTER,2022-09-23 17:43:54+00:00,TRNROUTER,2022-09-23
2,4700,20220923,6101,8450,0,0,2718230852480052195013,20220923145259,2022092314524310061,0,OK,2022-09-23 14:53:40+00:00,TRNROUTER,2022-09-23 14:53:40+00:00,TRNROUTER,2022-09-23
3,4700,20220923,6101,8449,0,0,2718279751262360599679,20220923145030,2022092314501210060,0,OK,2022-09-23 14:50:54+00:00,TRNROUTER,2022-09-23 14:50:54+00:00,TRNROUTER,2022-09-23
4,4700,20220923,6101,8457,0,0,2718226187847471389552,20220923154853,2022092315484510074,0,OK,2022-09-23 15:50:58+00:00,TRNROUTER,2022-09-23 15:50:58+00:00,TRNROUTER,2022-09-23
5,4700,20220923,6101,8451,0,0,2718265603007704476600,20220923150109,2022092315010110064,0,OK,2022-09-23 15:02:27+00:00,TRNROUTER,2022-09-23 15:02:27+00:00,TRNROUTER,2022-09-23
6,4700,20220923,6101,8456,0,0,2718229613166149434046,20220923154603,2022092315455210073,0,OK,2022-09-23 15:46:16+00:00,TRNROUTER,2022-09-23 15:46:16+00:00,TRNROUTER,2022-09-23
7,4700,20220923,7606,9862,0,0,2718242648723910035396,20220923161418,2022092316141010084,0,OK,2022-09-23 16:14:56+00:00,TRNROUTER,2022-09-23 16:14:56+00:00,TRNROUTER,2022-09-23
8,4700,20220923,7606,9859,0,0,2718242406488959134139,20220923160924,2022092316090810078,0,OK,2022-09-23 16:09:44+00:00,TRNROUTER,2022-09-23 16:09:44+00:00,TRNROUTER,2022-09-23
9,4700,20220923,7606,9860,0,0,2718288407311040904542,20220923161057,2022092316104910080,0,OK,2022-09-23 16:11:20+00:00,TRNROUTER,2022-09-23 16:11:20+00:00,TRNROUTER,2022-09-23


In [52]:
spark.stop()