## 1. Import the required libraries

In [None]:
import pygsheets
import pandas as pd
from oauth2client.service_account import ServiceAccountCredentials
import gspread
import json

## 2. Pull the Google Analytics raw data from your data source

In [None]:
#import the json file with the google credentials
with open("google_credentials.json", "r") as f:
    google_credentials = json.load(f)

#load the credentials in the right variable
scope = ['https://www.googleapis.com/auth/cloud-platform', 'https://spreadsheets.google.com/feeds']
credentials_gs = ServiceAccountCredentials.from_json_keyfile_dict(google_credentials, scope)
gc = gspread.authorize(credentials_gs)

#pull data from the right google spreadsheet and insert it into a dataframe
spreadsheet_key = 'YOUR_SPREADSHEET_KEY'
book = gc.open_by_key(spreadsheet_key)
worksheet = book.worksheet("GA raw data")
table = worksheet.get_all_values()
ga_raw_data = pd.DataFrame(table[1:], columns=table[0])

## 3. Apply basic transformation to the raw dataframe

In [None]:
#basic transofmration on the dataframe to have the right format
ga_raw_data = ga_raw_data.filter(items=['date', 'user_id', 'journey_id', 'utm_medium', 'submitted_applications'])
ga_raw_data = ga_raw_data.sort_values(by=['date', 'user_id'], ascending=True)
ga_raw_data['submitted_applications'] = ga_raw_data['submitted_applications'].astype(str).astype(int)
ga_raw_data['date'] = pd.to_datetime(ga_raw_data['date'])
ga_raw_data = ga_raw_data.reset_index().drop(columns=['index'])
ga_raw_data.head()

## 4. Loop over the raw dataframe and create the attributed dataframe

In [None]:
#Setting up the empty dataframe that will be filled with the attributed conversions
columns = ['date', 'utm_medium', 'conversions']
attributed_conversion_df = pd.DataFrame(columns=columns)
#looping over all the rows of the raw GA dataframe
for index, row in ga_raw_data.iterrows():
    #looking for rows that have at least 1 conversion
    if row['submitted_applications'] > 0: 
        #create a dataframe with the conversion raw and all former sessions of this user
        single_conversion_df = ga_raw_data[(ga_raw_data['date'] <= pd.to_datetime(row["date"])) & (ga_raw_data['user_id'] == row["user_id"])]
        #sorting by date and number of conversion ascending
        single_conversion_df = single_conversion_df.sort_values(by=['date', 'submitted_applications'], ascending=True) 
        #rank all the sessions in the user history (from the first one to the converssion)
        single_conversion_df['occurences'] = single_conversion_df.groupby('user_id').cumcount() + 1 
        #set up a variable with the total number of sessions
        occurences = single_conversion_df.loc[(single_conversion_df['submitted_applications'] == row["submitted_applications"]) & (single_conversion_df['date'] == row["date"])& (single_conversion_df['utm_medium'] == row["utm_medium"]), 'occurences'].iloc[0] 
        #set up a value that divides the conversion(s) by the number of session
        conversion_per_row = single_conversion_df.loc[single_conversion_df['submitted_applications'] == row["submitted_applications"], 'submitted_applications'].iloc[0]/occurences 
        #append the linear distributed conversion to each row
        single_conversion_df['conversions'] = float(conversion_per_row)
        #set the conversion date for each row
        single_conversion_df['date'] = single_conversion_df.loc[(single_conversion_df['submitted_applications'] == row["submitted_applications"]) & (single_conversion_df['date'] == row["date"])& (single_conversion_df['utm_medium'] == row["utm_medium"]), 'occurences'].iloc[0] 
        #filter the relevant columns for the attributed dataframe
        single_conversion_df = single_conversion_df.filter(items=['date', 'utm_medium', 'conversions'])
        #append the conversion dataframe to the main dataframe
        attributed_conversion_df = attributed_conversion_df.append(single_conversion_df)

attributed_conversion_df.head()

## 5. Send the attributed dataframe to the data source

In [None]:
#authorization
gc = pygsheets.authorize(service_file='google_credentials.json')

#open the google spreadsheet (where 'PY to Gsheet Test' is the name of my sheet)
sh = gc.open("Multi-touchpoint attribution model – Talent Acqusition")

#select the first sheet
wks = sh[1]

#update the first sheet
wks.set_dataframe(attributed_conversion_df,(1,1))