In [1]:
# from __future__ import print_function

import sys

import numpy as np
from pyspark.sql import SparkSession
from datetime import datetime, timedelta
import pyspark.sql.functions as F

import pytz
# print(__name__)builtins
if __name__ == "__main__":
    spark = SparkSession \
        .builder \
        .appName("Python Demo") \
        .config("hive.metastore.client.factory.class",
                "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory") \
        .config("spark.driver.maxResultSize", "4g") \
        .enableHiveSupport() \
        .getOrCreate()
    spark.conf.set("hive.exec.dynamic.partition.mode", "nonstrict");
    spark.conf.set("spark.sql.sources.partitionOverwriteMode", "dynamic")
    databases = spark.sql("show databases")
    databases = databases.collect()
    df1=spark.sql("""
       with 
            customer_mark as
            (
              select aa.customer_id,
              case when  app.status in ('AUTHORIZATION','BIND_CARD','ISSUING','READY_TO_ISSUE','CURRENT' ,'PAID_OFF') and app.status = 'CURRENT' then 'old_current'
              when  app.status in ('AUTHORIZATION','BIND_CARD','ISSUING','READY_TO_ISSUE','CURRENT' ,'PAID_OFF') and app.status not in( 'PAID_OFF','CURRENT') and date(app.create_time + interval '7' hour + interval '7' day) >= date(now() + interval '7' hour) then 'old_active'
              when  app.status in ('AUTHORIZATION','BIND_CARD','ISSUING','READY_TO_ISSUE','CURRENT' ,'PAID_OFF') and app.status not in( 'PAID_OFF','CURRENT') and date(app.create_time + interval '7' hour + interval '30' day) >= date(now() + interval '7' hour) then 'old_silence'
              when  app.status in ('AUTHORIZATION','BIND_CARD','ISSUING','READY_TO_ISSUE','CURRENT' ,'PAID_OFF') and app.status not in( 'PAID_OFF','CURRENT') then 'old_loss'
              when app.status in ('AUTHORIZATION','BIND_CARD','ISSUING','READY_TO_ISSUE','CURRENT' ,'PAID_OFF') and  app.status = 'PAID_OFF' then 'old_loss' else 'old_other' end as mark
              from 
              (
                select customer_id,max(id) as last_loan_id
                from `banda-etl-s3`.`t_loan_app`
                where customer_id in (select customer_id from `banda-etl-s3`.`t_loan_app` where loan_type = 'RE_PAYDAY' )
                group by customer_id
              )aa
              left join `banda-etl-s3`.`t_loan_app` app
              on aa.last_loan_id = app.id

              union all


              select aa.customer_id,
              case when app.status in ('AUTHORIZATION','BIND_CARD','ISSUING','READY_TO_ISSUE') then 'new_authorization' else 'new_other' end as mark
              from 
              (
                select customer_id,max(id) as last_loan_id
                from `banda-etl-s3`.`t_loan_app`
                where customer_id not in (select customer_id from `banda-etl-s3`.`t_loan_app` where loan_type = 'RE_PAYDAY' )
                group by customer_id
              )aa
              left join `banda-etl-s3`.`t_loan_app` app
              on aa.last_loan_id = app.id

              union all

              select id as customer_id,'new_not_apply' as mark
              from `banda-etl-s3`.`t_customer`
              where id not in (select customer_id from `banda-etl-s3`.`t_loan_app`)
            ),
            track as
            (
              select partition_0,partition_1,partition_2,user_uuid,applist,count(*)
              from `ddb_event_track_s3`.`id_adapundi_event_track`
              where concat(partition_0,'-',partition_1,'-',partition_2) >= '2020-09-01'
              group by partition_0,partition_1,partition_2,user_uuid,applist


            ),

            total_log as
            (
              select user_uuid,count(*) as count
              from track
              group by user_uuid
            ),
            competitor as
            (
              select user_uuid,count(*) as count
              from track
              where applist like '%AdaKami%' or 
              applist like '%Easycash%' or 
              applist like '%Kredit_Pintar%' or 
              applist like '%Kta Kilat%' or 
              applist like '%UangMe%' or 
              applist like '%360Kredi%' or 
              applist like '%Rupiah Cepat%' or 
              applist like '%UKU%' or 
              applist like '%Cairin%' or 
              applist like '%Indodana%' or 
              applist like '%Kredito%' or 
              applist like '%FinPlus%' or 
              applist like '%Pinjam Yuk%' or 
              applist like '%Solusikita%' or 
              applist like '%DanaRupiah%' or 
              applist like '%Tunaikita%' or 
              applist like '%Cashwagon%' 
              group by user_uuid

            )

            select date(now()) as `日期`,'全量' as `统计纬度`,
            count(mark.customer_id) as `新户`,
            count(if(mark.mark = 'new_not_apply',mark.customer_id,null)) as `新户-注册未申请`,
            count(if(mark.mark = 'new_authorization',mark.customer_id,null)) as `新户-获额未交易`,
            count(if(mark.mark = 'new_other',mark.customer_id,null)) as `新户-其他`,

            count(mark.customer_id) as `老户`,
            count(if(mark.mark = 'old_current',mark.customer_id,null)) as `老户-在贷`,
            count(if(mark.mark = 'old_active',mark.customer_id,null)) as `老户-活跃`,
            count(if(mark.mark = 'old_loss',mark.customer_id,null)) as `老户-流失`,
            count(if(mark.mark = 'old_silence',mark.customer_id,null)) as `老户-沉默`,
            count(if(mark.mark = 'old_other',mark.customer_id,null)) as `老户-其他`

            from  customer_mark mark

            union all

            select date(now()) as `日期`,'获取到applist用户' as `统计纬度`,
            count(mark.customer_id) as `新户`,
            count(if(mark.mark = 'new_not_apply',mark.customer_id,null)) as `新户-注册未申请`,
            count(if(mark.mark = 'new_authorization',mark.customer_id,null)) as `新户-获额未交易`,
            count(if(mark.mark = 'new_other',mark.customer_id,null)) as `新户-其他`,

            count(mark.customer_id) as `老户`,
            count(if(mark.mark = 'old_current',mark.customer_id,null)) as `老户-在贷`,
            count(if(mark.mark = 'old_active',mark.customer_id,null)) as `老户-活跃`,
            count(if(mark.mark = 'old_loss',mark.customer_id,null)) as `老户-流失`,
            count(if(mark.mark = 'old_silence',mark.customer_id,null)) as `老户-沉默`,
            count(if(mark.mark = 'old_other',mark.customer_id,null)) as `老户-其他`

            from  customer_mark mark
            left join `banda-etl-s3`.`t_customer` cu
            on mark.customer_id = cu.id
            left join total_log 
            on cu.uid =total_log.user_uuid
            where total_log.user_uuid is not null


            union all

            select date(now()) as `日期`,'applist包含竞品的用户' as `统计纬度`,
            count(mark.customer_id) as `新户`,
            count(if(mark.mark = 'new_not_apply',mark.customer_id,null)) as `新户-注册未申请`,
            count(if(mark.mark = 'new_authorization',mark.customer_id,null)) as `新户-获额未交易`,
            count(if(mark.mark = 'new_other',mark.customer_id,null)) as `新户-其他`,

            count(mark.customer_id) as `老户`,
            count(if(mark.mark = 'old_current',mark.customer_id,null)) as `老户-在贷`,
            count(if(mark.mark = 'old_active',mark.customer_id,null)) as `老户-活跃`,
            count(if(mark.mark = 'old_loss',mark.customer_id,null)) as `老户-流失`,
            count(if(mark.mark = 'old_silence',mark.customer_id,null)) as `老户-沉默`,
            count(if(mark.mark = 'old_other',mark.customer_id,null)) as `老户-其他`

            from  customer_mark mark
            left join `banda-etl-s3`.`t_customer` cu
            on mark.customer_id = cu.id
            left join competitor 
            on cu.uid =competitor.user_uuid
            where competitor.user_uuid is not null

    """)
#     df1.write.mode("overwrite").orc("s3://rupiahplus-data-warehouse/etl/banda/market/id_adapundi_real-time_competitor")



VBox()

Starting Spark application


ID,YARN Application ID,Kind,State,Spark UI,Driver log,Current session?
0,application_1611310116108_0001,pyspark,idle,Link,Link,✔


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

SparkSession available as 'spark'.


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

An error was encountered:
Invalid status code '400' from https://10.3.0.54:18888/sessions/0/statements/1 with error payload: {"msg":"requirement failed: Session isn't active."}
