**PLEASE MAKE A COPY BEFORE CHANGING**

**Copyright** 2022 Google LLC

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    https://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.


<b>Important</b>
This content are intended for educational and informational purposes only.

## Instructions

##### 1. Export Install Report from Appsflyer
##### 2. Upload csv to Google Drive
##### 3. Configure the locations below then run this colab.

# Import necessary packages

In [None]:
## Import Packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Mount Google Drive

In [None]:
## Mount to Google Drive
from google.colab import drive
drive.mount('/content/drive')
print("Log: Google Drive mounted on 'Files' tab")

# Import Appsflyer's *Install Report* as csv from Google Drive

In [None]:
file_path = "/content/drive/location/file.csv" # @param {type:"string"}
low_memory=False
df = pd.read_csv(file_path)

# Prepare and check dataframe

In [None]:
# @title Select necessary columns and prepare dataframe { vertical-output: true, display-mode: "form" }
## Select necessary columns
df = df[df['Event Name']=='install']
df = df[['Attributed Touch Type'
        , 'Attributed Touch Time'
        , 'Install Time'
        , 'Media Source'
        , 'Country Code'
        , 'Contributor 1 Touch Type' 
        , 'Contributor 1 Touch Time'
        , 'Contributor 1 Media Source'
        , 'Contributor 2 Touch Type' 
        , 'Contributor 2 Touch Time'
        , 'Contributor 2 Media Source'
        , 'Contributor 3 Touch Type' 
        , 'Contributor 3 Touch Time'
        , 'Contributor 3 Media Source'
         ]]
## Calculate time Touch to install time
df['Install-Touch Timestamp'] = (pd.to_datetime(df['Install Time']) -\
                                 pd.to_datetime(df['Attributed Touch Time']))

df['Install-Touch sec'] = pd.to_timedelta(df['Install-Touch Timestamp'], unit='s')

df['Install-Touch sec'] = df['Install-Touch sec'].dt.total_seconds()
df.rename(columns={'Media Source': 'Attributed Media Source'}, inplace=True)
df.head(3)

In [None]:
# @title Describe data { vertical-output: true, display-mode: "form" }

grouping = "Attributed Media Source" #@param ["Attributed Media Source", "Contributor 1 Media Source", "Contributor 2 Media Source", "Contributor 3 Media Source"]

df_cont = df.groupby(grouping).agg(['count', 'mean','min','max','std'])
column = 'Install-Touch sec' # @param['Install-Touch sec']
min_entries =  500 # @param {type:"number"}

df_cont=df_cont[column].sort_values(by=['count'], ascending=False)
df_cont=df_cont[df_cont['count']>=min_entries]

##Affects next card
medias = list(df_cont.index.values) 

df_cont

# Plots

In [None]:
# @title Use Violin Plots to compare distributions side by side { vertical-output: true, display-mode: "form" }
col_x = 'Attributed Media Source'
col_y = 'Install-Touch sec'

sns.set(rc={'axes.facecolor':'white', 'figure.facecolor':'white'}, font_scale=1.15)
sns.set_theme(style="whitegrid")

sec_min =  0 # @param {type:"number"}
sec_max = 960 # @param {type:"number"}

f, ax = plt.subplots(figsize=(30, 8))
ax = sns.violinplot(x=col_x
              , y=col_y
              , data=df[((df[col_y]<=sec_max))], 
                palette = "tab20_r",bw=.2, cut=1, linewidth=1, order=medias)

In [None]:
#@title Plot histogram to compare distributions { vertical-output: true, display-mode: "form" }

max_sec = 960 # @param {type:"number"}
bsize = 10 # @param {type:"number"}
#Change baseline to desired media source
baseline = 'googleadwords_int' # @param["googleadwords_int"] {allow-input: true}
#Change media_source to compare
media_source = 'googleadwords_int' # @param["googleadwords_int"] {allow-input: true}

df_filtered = df[(df['Install-Touch sec']<= max_sec) & (df['Install-Touch sec']>= 0)]
df_filtered1 = df_filtered[df_filtered['Attributed Media Source']==baseline]
df_filtered2 = df_filtered[df_filtered['Attributed Media Source']==media_source]

sns.set(rc={'axes.facecolor':'white', 'figure.facecolor':'white'})
f, ax = plt.subplots(figsize=(20, 10))
sns.histplot( df_filtered1['Install-Touch sec'], stat='density', kde=False, 
             color="slategray", label=baseline, bins=range(0, max_sec + bsize, bsize))
sns.histplot( df_filtered2['Install-Touch sec'], stat='density', kde=False, 
             color="deeppink", label=media_source, bins=range(0, max_sec + bsize, bsize))
plt.legend()
plt.show()

# Contribution Ratio

In [None]:
#@title Evaluate contribution/attribution ratio { vertical-output: true, display-mode: "form" }

df_contrib = pd.DataFrame(df['Attributed Media Source'].value_counts())\
            .join(pd.DataFrame(df['Contributor 1 Media Source'].value_counts()),how='outer')\
            .join(pd.DataFrame(df['Contributor 2 Media Source'].value_counts()),how='outer')\
            .join(pd.DataFrame(df['Contributor 3 Media Source'].value_counts()),how='outer').fillna(0)

df_contrib['Contributions']= df_contrib[list(df_contrib.columns)[1:]].sum(axis=1)
df_contrib['Ratio']=df_contrib['Contributions'] / df_contrib['Attributed Media Source']

df_contrib=df_contrib.sort_values(by=['Attributed Media Source'],ascending=False)
df_contrib.style.format({'Attributed Media Source':"{:,}",\
                         'Contributor 1 Media Source':"{:,}",\
                         'Contributor 2 Media Source':"{:,}",\
                         'Contributor 3 Media Source':"{:,}",\
                         'Contributions':"{:,}",\
                         'Ratio': "{:.2%}"})