In [28]:
%%capture
!pip3 install pandas

In [29]:
import pandas as pd
from pprint import pprint

# Specify known utm combinations
known_utm_combinations = [
    ["linkedin", "apply_directly"],  # Apply for job directy through linkedin, remoteok or other platform
    ["linkedin", "my_profile_page"],
    ["linkedin", "msg_linkedin"], # Find someone in a company that is hiring

    ["email", "msg_email"], # Message someone directly using email

    ["pdf", "backend-developer-v1"],
    ["pdf", "product-manager-v1"]
]

known_utm_combinations_df = pd.DataFrame(known_utm_combinations, columns=['utm_source', 'utm_campaign'])
pprint(known_utm_combinations_df.to_string())

unique_utm_sources = known_utm_combinations_df['utm_source'].unique()
unique_utm_campaigns = known_utm_combinations_df['utm_campaign'].unique()

# Make analytics for certain period of time
duration_days = 30

# Set true if there is no data available
mock = True

# Timezone
timezone = "Etc/UTC"

('  utm_source          utm_campaign\n'
 '0   linkedin        apply_directly\n'
 '1   linkedin       my_profile_page\n'
 '2   linkedin          msg_linkedin\n'
 '3      email             msg_email\n'
 '4        pdf  backend-developer-v1\n'
 '5        pdf    product-manager-v1')


In [30]:
import os, requests
from random import randrange


fields_stats_for_simple_analytics = [
    "pageviews", # the total amount of page views in the specified period
    "visitors", # the total amount of visitors (unique page views) in the specified period
    "histogram", # an array with page views and visitors per day
    "countries", # a list of country codes
    "utm_sources",
    "utm_campaigns",
    "seconds_on_page" # the median of seconds a visitor spent on the page
]


def mock_simple_analytics_stats(stats):
    # Reset counters
    stats['pageviews'] = 0
    stats['visitors'] = 0
    
    for histogram in stats['histogram']:
        page_views = randrange(0, 100)
        visitors = page_views // randrange(3, 5)
        
        histogram['pageviews'] = page_views
        histogram['visitors'] = visitors
        
        # Add to overall stats
        stats['pageviews'] += page_views
        stats['visitors'] += visitors
    
    
    stats['seconds_on_page'] = randrange(1, 35)
    
    # It is possible to populate using average amount of visitors, distribute over know utm tags and non-utm visitors
    stats['utm_campaigns'] = list(map(lambda value: {
        'pageviews': randrange(1, 35),
        'seconds_on_page': randrange(1, 30),
        'value': value,
        'visitors': randrange(10, 50),
    }, unique_utm_campaigns))

    stats['utm_sources'] = list(map(lambda value: {
        'pageviews': randrange(1, 35),
        'seconds_on_page': randrange(1, 30),
        'value': value,
        'visitors': randrange(10, 50),
    }, unique_utm_sources))
    
    return stats
    


def convert_and_filter_utm_params(stats_utm, known_utm_params):
    df = pd.DataFrame(stats_utm)
    df = df[df['value'].isin(known_utm_params)]
    return df if not df.empty else None



def get_simple_analytics_stats(start = None, end = None):
    fields_stats_for_simple_analytics_str = ','.join(fields_stats_for_simple_analytics)
    url = f"https://simpleanalytics.com/artbred.io.json?info=false&version=5&fields={fields_stats_for_simple_analytics_str}&timezone={timezone}"
    
    # By default returns data for last 30 days
    if start and end:
        url += f"&start={start}&end={end}"

    response = requests.get(url, headers={
        "Content-Type": "application/json",
    })
        
    stats = response.json()
    if not stats['ok']:
        raise ValueError(stats)
    
    if mock:
        stats = mock_simple_analytics_stats(stats)

    # TODO
    del stats['countries']

    stats['histogram'] = pd.DataFrame(stats['histogram'])

    stats['utm_sources'] = convert_and_filter_utm_params(stats['utm_sources'], unique_utm_sources)
    stats['utm_campaigns'] = convert_and_filter_utm_params(stats['utm_campaigns'], unique_utm_campaigns)

    return stats
    
    
simple_analytics_stats = get_simple_analytics_stats()
pprint(simple_analytics_stats)

{'docs': 'https://docs.simpleanalytics.com/api',
 'end': '2023-02-14T23:59:59.999Z',
 'generated_in_ms': 461,
 'histogram':           date  pageviews  visitors
0   2023-01-14         83        27
1   2023-01-15         55        18
2   2023-01-16         62        20
3   2023-01-17         63        21
4   2023-01-18         10         3
5   2023-01-19         40        13
6   2023-01-20         39        13
7   2023-01-21         75        18
8   2023-01-22         42        14
9   2023-01-23         68        22
10  2023-01-24         69        23
11  2023-01-25         14         4
12  2023-01-26         70        23
13  2023-01-27         55        13
14  2023-01-28          0         0
15  2023-01-29         94        31
16  2023-01-30         18         6
17  2023-01-31         65        16
18  2023-02-01          9         2
19  2023-02-02         48        12
20  2023-02-03         91        30
21  2023-02-04         97        32
22  2023-02-05         90        30
23  2023-02-