In [None]:
# Import python packages
import streamlit as st
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# We can also use Snowpark for our analyses!
from snowflake.snowpark.context import get_active_session
session = get_active_session()
import warnings
warnings.filterwarnings("ignore")


In [None]:
def remove_double_quotes_from_object_columns(df):
    object_columns = df.select_dtypes(include='object').columns
    df[object_columns] = df[object_columns].apply(lambda col: col.str.replace('"', ''))
    return df

In [None]:
def video_duration_validation(data):
    print('##### Starting Video Duration Validation ######')
    total_records = data.shape[0]
    data_0 = data[data['VIDEO_DURATION'] == 0]
    count_0 = data_0.shape[0]
    if count_0 > 0:
        print(f"   Number of records with video_duration == 0: {count_0} represents the {round((count_0/total_records)*100,2)}%")  
        unique_videos = len(data_0['VIDEO_ID'].unique())
        print(f'   Number of videos {unique_videos}')
        print(data_0['VIDEO_ID'].unique())
    print(f"   Max Video Duration: {data['VIDEO_DURATION'].max()}")
    print('##### Ending Video Duration Validation ######')


In [None]:
def total_ad_capacity_validation(data):
    print('##### Starting total ad capacity Validation ######')
    total_records = data.shape[0]
    total_0 = data[data['TOTAL_ADS_CAPACITY'] == 0].shape[0]
    data["ADS_CAPACITY"] = 1 + (data['VIDEO_DURATION'] // 120)
    ads_dif = data[data['TOTAL_ADS_CAPACITY'] != data["ADS_CAPACITY"]].shape[0]
    ads_view = data[data['TOTAL_ADS_VIEWED'] > data['TOTAL_ADS_CAPACITY']].shape[0]
    if total_0 > 0:
        print(f"   Number of records with total_ads_capacity == 0: {total_0} represents the {round((total_0/total_records)*100,2)}%")   
    if ads_dif > 0:
        print(f"   Number of records with incorrect Ads capacity: {ads_dif} represents the {round((ads_dif/total_records)*100,2)}%")   
    if ads_view > 0:
        print(f"   Number of records ads_viewed > ads_capacity: {ads_view} represents the {round((ads_view/total_records)*100,2)}%")   

    print('##### Ending total ad capacity Validation ######')

In [None]:
def max_playhead_validation(data):
    print('##### Starting max playhead Validation ######')
    data = data[data['VIDEO_SOURCE'] == 'webiny']
    total_records = data.shape[0]
    print(f'  Webiny Records {total_records}')
    total_low   = data[data['MAX_PLAYHEAD_POSITION'] < 0].shape[0]
    total_high  = data[data['MAX_PLAYHEAD_POSITION']       >  data['VIDEO_DURATION']].shape[0]
    vid_compl   = data[(data['VIDEO_COMPLETED'] == 'true') &  (data['MAX_PLAYHEAD_POSITION'] ==0)].shape[0]
    vid_compl2  = data[(data['VIDEO_COMPLETED'] == 'true') &  (data['MAX_PLAYHEAD_POSITION'] > data['VIDEO_DURATION'])].shape[0]
    playhead_high = data[data['PLAYHEAD_POSITION'] > data['VIDEO_DURATION']].shape[0]
    if total_low > 0:
        print(f"  1Number of records max_playhead < 0: {total_low} represents the {round((total_low/total_records)*100,2)}%")   
    if total_high > 0:
        print(f"  2Number of records max_playhead > video_duration: {total_high} represents the {round((total_high/total_records)*100,2)}%")   
    if vid_compl > 0:
        print(f"  3Number of records video_complete = true and max_playhead==0: {vid_compl} represents the {round((vid_compl/total_records)*100,2)}%")   
    if vid_compl2 > 0:
        print(f"  4Number of records video_complete = true and max_playhead > video_duration: {vid_compl2} represents the {round((vid_compl2/total_records)*100,2)}%")   
    if playhead_high > 0:
        print(f"  5Number of records  playhead_position > video_duration: {playhead_high} represents the {round((playhead_high/total_records)*100,2)}%")   
    print('##### Ending max playhead Validation ######')

In [None]:
def ad_time_validation(data):
    print('##### Starting Ad Time Validation ######')
    total_records = data.shape[0]
    print(f'{total_records}')
    adtime_0 = data[(data['TOTAL_ADS_VIEWED'] > 0) & (data['TOTAL_AD_TIME'] == 0)].shape[0]
    adview_0 = data[(data['TOTAL_ADS_VIEWED'] == 0) & (data['TOTAL_AD_TIME'] > 0)].shape[0]
    adtime_low = data[data['TOTAL_AD_TIME'] < 0].shape[0]
    if adtime_0 > 0:
        print(f"   Number of records where ads_viewed > 0 and ad_time == 0: {adtime_0} represents the {round((adtime_0/total_records)*100,2)}%")   
    if adview_0 > 0:
        print(f"   Number of records where ads_viewed == 0 and ad_time > 0: {adview_0} represents the {round((adview_0/total_records)*100,2)}%")   
    if adtime_low > 0:
        print(f"   Total Ad Time < 0: {adtime_low} represents the {round((adtime_low/total_records)*100,2)}%")   
    print('##### Ending Ad Time Validation ######')

In [None]:
def watch_time_validation(data):
    print('##### Starting Watch time Validation ######')
    total_records = data.shape[0]
    watch_high  = data[data['TOTAL_WATCH_TIME'] > data['TOTAL_SESSION_TIME']].shape[0]
    watch_lower = data[data['TOTAL_WATCH_TIME'] < 0].shape[0]
    sess_lower  = data[data['TOTAL_SESSION_TIME'] < 0].shape[0]
    if watch_high > 0:
        print(f"   Total Watch > Total Session: {watch_high} represents the {round((watch_high/total_records)*100,2)}%")   
    if watch_lower > 0:
        print(f"   Total Watch < 0: {watch_lower} represents the {round((watch_lower/total_records)*100,2)}%")   
    if sess_lower > 0:
        print(f"   Total Session < 0: {sess_lower} represents the {round((sess_lower/total_records)*100,2)}%")   
    print('##### Ending Watch Time Validation ######')

In [None]:
select 
--HTTPHEADERUSERAGENT, MPARTICLEUSERID, OTHERUSERID, OTHERUSERID3, BATCHID, CLIENTIP, COUNTRYCODE, CITYNAME, POSTALCODE, REGIONCODE, SESSIONID,        
--        SESSIONSTARTTIMESTAMP, EVENTID, EVENTDATE, EVENTHOUR, EVENTNAME, 
--        EVENTATTRIBUTES, USERATTRIBUTES, MPARTICLEDEVICEID,eventtimestamp,
        --EVENTATTRIBUTES:"app_name" as app_name,
        --EVENTATTRIBUTES:"car_data_array" as car_data_array,
        --EVENTATTRIBUTES:"car_make" as car_make,
        --EVENTATTRIBUTES:"car_model" as car_model,
        --EVENTATTRIBUTES:"car_segment" as car_segment,
        --EVENTATTRIBUTES:"car_trim" as car_trim,
        --EVENTATTRIBUTES:"car_year" as car_year,
        --EVENTATTRIBUTES:"content_brand" as content_brand,
        --EVENTATTRIBUTES:"content_id" as content_id,
        --EVENTATTRIBUTES:"content_modification_date" as content_modification_date,
        --EVENTATTRIBUTES:"content_photographer_primary" as content_photographer_primary,
        --EVENTATTRIBUTES:"content_photographer_secondary" as content_photographer_secondary,
        --EVENTATTRIBUTES:"content_publication_date" as content_publication_date,
        --EVENTATTRIBUTES:"content_tags" as content_tags,
        --EVENTATTRIBUTES:"content_title" as content_title,
        --EVENTATTRIBUTES:"content_type" as content_type,
        --EVENTATTRIBUTES:"content_version" as content_version,
        --EVENTATTRIBUTES:"content_writer_primary" as content_writer_primary,
        --EVENTATTRIBUTES:"content_writer_secondary" as content_writer_secondary,
        --EVENTATTRIBUTES:"device_family" as device_family,
        --EVENTATTRIBUTES:"device_screen_area" as device_screen_area,
        --EVENTATTRIBUTES:"device_screen_height" as device_screen_height,
        --EVENTATTRIBUTES:"device_screen_width" as device_screen_width,
        --EVENTATTRIBUTES:"device_user_agent" as device_user_agent,
        --EVENTATTRIBUTES:"device_user_agent_bot" as device_user_agent_bot,
        --EVENTATTRIBUTES:"dq_modifications" as dq_modifications,
        --EVENTATTRIBUTES:"event_name" as event_name,
        --EVENTATTRIBUTES:"inventory_widget_present" as inventory_widget_present,
        EVENTATTRIBUTES:"max_playhead_position" as max_playhead_position,
        --EVENTATTRIBUTES:"nitrous_version" as nitrous_version,
        --EVENTATTRIBUTES:"page_id" as page_id,
        --EVENTATTRIBUTES:"platform_name" as platform_name,
        EVENTATTRIBUTES:"playhead_position" as playhead_position,
        --EVENTATTRIBUTES:"session_id" as session_id,
        EVENTATTRIBUTES:"total_ad_time" as total_ad_time,
        EVENTATTRIBUTES:"total_ads_capacity" as total_ads_capacity,
        EVENTATTRIBUTES:"total_ads_viewed" as total_ads_viewed,
        EVENTATTRIBUTES:"total_session_time" as total_session_time,
        EVENTATTRIBUTES:"total_watch_time" as total_watch_time,
        --EVENTATTRIBUTES:"true_view" as true_view,
        --EVENTATTRIBUTES:"video_casted" as video_casted,
        EVENTATTRIBUTES:"video_completed" as video_completed,
        EVENTATTRIBUTES:"video_duration" as video_duration,
        EVENTATTRIBUTES:"video_id" as video_id,
        --EVENTATTRIBUTES:"video_publication_date" as video_publication_date,
        --EVENTATTRIBUTES:"video_session_id" as video_session_id,
        EVENTATTRIBUTES:"video_source" as video_source,
        --EVENTATTRIBUTES:"video_tags" as video_tags,
        --EVENTATTRIBUTES:"video_title" as video_title,
        --EVENTATTRIBUTES:"video_type" as video_type,
        --EVENTATTRIBUTES:"view_canonical_path" as view_canonical_path,
        --EVENTATTRIBUTES:"view_canonical_url" as view_canonical_url,
        --EVENTATTRIBUTES:"view_count" as view_count,
        --EVENTATTRIBUTES:"view_domain" as view_domain,
        --EVENTATTRIBUTES:"view_group" as view_group,
        --EVENTATTRIBUTES:"view_name" as view_name,
        --EVENTATTRIBUTES:"view_name_previous" as view_name_previous,
        --EVENTATTRIBUTES:"view_path" as view_path,
        --EVENTATTRIBUTES:"view_sponsored" as view_sponsored,
        --EVENTATTRIBUTES:"view_state" as view_state,
        --EVENTATTRIBUTES:"view_subdomain" as view_subdomain,
        --EVENTATTRIBUTES:"view_template" as view_template,
        --EVENTATTRIBUTES:"view_url" as view_url,
        EVENTATTRIBUTES:"window_id" as window_id,
from MP_VW_EVENT_OTHER_VIDEOVIEWENDED
WHERE appenvironment = 'Production'
AND   eventdate = CURRENT_DATE - 1
AND   APPNAME = 'NextWeb'

In [None]:
vve = vve_query.to_pandas()
vve.shape

In [None]:
vve = remove_double_quotes_from_object_columns(vve)

In [None]:
vve['PLAYHEAD_POSITION'] = vve['PLAYHEAD_POSITION'].astype('int64')
vve['VIDEO_DURATION'] = vve['VIDEO_DURATION'].astype('int64')
vve['MAX_PLAYHEAD_POSITION'] = vve['MAX_PLAYHEAD_POSITION'].astype('int64')
vve['TOTAL_AD_TIME'] = vve['TOTAL_AD_TIME'].astype('int64')
vve['TOTAL_ADS_CAPACITY'] = vve['TOTAL_ADS_CAPACITY'].astype('int64')
vve['TOTAL_ADS_VIEWED'] = vve['TOTAL_ADS_VIEWED'].astype('int64')
vve['TOTAL_SESSION_TIME'] = vve['TOTAL_SESSION_TIME'].astype('int64')
vve['TOTAL_WATCH_TIME'] = vve['TOTAL_WATCH_TIME'].astype('int64')

In [None]:
video_duration_validation(vve)

In [None]:
total_ad_capacity_validation(vve)

In [None]:
max_playhead_validation(vve)

In [None]:
ad_time_validation(vve)

In [None]:
watch_time_validation(vve)