In [12]:
import numpy as np
from scipy import stats
import pandas as pd
from getpass import getpass
import snowflake.connector
password = getpass('Password: ')

In [None]:
# connect to scowflake
cn = snowflake.connector.connect(
    user = '',
    password = password,
    account = 'ox58300.eu-west-1',
    warehouse = 'COMPUTE_WH_S',
    database = 'REPORTING',
    schema = 'DWH')

In [None]:
#total

In [None]:
query_total = '''
with project_details as(
    select PROJECT_ID
          ,PROJECT_PUBLIC_KEY

    from REPORTING.PROJECTS.PROJECT_DETAILS
    where TEST_PROJECT=0 and PROJECT_DELETED_IND=0
 ),

 join_to_convo_from_intents as (
     select distinct
     event_time::date                                    as period_date,
     event_data:"user_properties":"project_id"::integer  as project_id,
      event_type

      from staging.amplitude.stg_amplitude__raw_events
      where event_type ='Conversation: Join to the conversation'
      and
      (event_data:"event_properties":"source"='intents'
       or
       event_data:"event_properties":"source"
       in('shippingPolicy','orderIssues','products'))
       and period_date>='2022-12-01'
 ),
  intent_adoption as (
    select project_id
        , min(period_date) as adoption_date
        , datediff(day,adoption_date,current_date-1) as diff_since_adoption
    from join_to_convo_from_intents
    group by 1
) ,
  ml_convos as (
    select PUBLIC_KEY,
        THREAD_ID,
        CONV_ID
    from staging.NLP.STG_NLP__INTENT_CLASSIFIER_PREDICTIONS
    where INTENT_LABEL in ('order cancelation','order change','order damaged/wrong','order status','order support',
    'product','product exchange','refund','return','shipping change','shipping delivery issue','shipping policy')
),


 conversation_thread as (
    select distinct ct.project_id
                   , ia.adoption_date
                   , ct.started_at::DATE
                   , case when ct.started_at::DATE <adoption_date then 'before' else 'after' end tipping_point
                   , ct.conversation_id
                   , ct.id conversation_thread_id
                   , HAS_MESSAGES_FROM_VISITOR

    from intent_adoption ia
    join project_details pd
    on pd.PROJECT_ID=ia.project_id
    join reporting.core.conversation_thread ct
    on ct.PROJECT_ID=ia.project_id
    join ml_convos mc
     on mc.PUBLIC_KEY=pd.PROJECT_PUBLIC_KEY and  ct.id=mc.THREAD_ID and ct.started_at::DATE  >= dateadd(day,-diff_since_adoption,adoption_date)
     where ia.adoption_date is not null and ct.started_at::DATE>='2022-09-01'
 ),

 conversation_threads_operators as (
    select conversation_thread_id
          , operator_id
          , response_time
          , is_first_response_in_thread

    from reporting.core.conversation_threads_operators
 ),

conversation_threads_joined as (
     select * from conversation_thread
      left join conversation_threads_operators using(conversation_thread_id)
 )

select   project_id,tipping_point
       , sum(response_time) as total_first_response_time
       , round(stddev(RESPONSE_TIME),2) as standard_dev
   -- response_time is null when the convo was started by operator so we want to exclude these threads
       ,  sum(iff(response_time is not null,1,0)) as afrt_conversations_count
       , round(total_first_response_time/afrt_conversations_count,2) avg_frt

from conversation_threads_joined
where has_messages_from_visitor=1 and is_first_response_in_thread=1
group by 1,2
'''

# connect to scowflake
cn = snowflake.connector.connect(
    user = 'mkulczyk',
    password = password,
    account = 'ox58300.eu-west-1',
    warehouse = 'COMPUTE_WH_S',
    database = 'REPORTING',
    schema = 'DWH')

# load the data
data_total= pd.read_sql(query_total, cn)

In [None]:
data_total

In [25]:
data_filtered_convo_total = data_total[data_total['AFRT_CONVERSATIONS_COUNT'] >= 2]

In [None]:
data_filtered_convo_total

In [None]:
pivot_data_total = data_filtered_convo_total.pivot(index='PROJECT_ID', columns='TIPPING_POINT',values='AVG_FRT')
pivot_data_total

In [None]:
filtered_pivot_data_total = pivot_data_total[~pivot_data_total['after'].isna() & ~pivot_data_total['before'].isna()]
filtered_pivot_data_total

In [None]:
stats.ttest_rel(filtered_pivot_data_total.before, filtered_pivot_data_total.after)

In [None]:
sum_convo_total  = data_filtered_convo_total.pivot(index='PROJECT_ID', columns='TIPPING_POINT',
                                                       values='AFRT_CONVERSATIONS_COUNT').sum()
sum_time_total  = data_filtered_convo_total.pivot(index='PROJECT_ID', columns='TIPPING_POINT',
                                                      values='TOTAL_FIRST_RESPONSE_TIME').sum()
result_total = (sum_time_total / sum_convo_total ) / 60
result_total

In [49]:
diff_order_total = result_total.after - result_total.before
diff_order_total

-2.4973253669751827