# Import Packages

In [3]:
from dscigametrics.compute_metrics import compute_metrics
from dscigametrics.stat_summary import stat_summary
from dscigametrics.daily_plot import daily_plot
from dscigametrics.find_campaigns import find_campaigns
import pandas as pd

# print(dscigametrics.stat_summary.__version__)

# Read Data
## Read the Google Analytics Data as a Pandas DataFrame

In [7]:
data_path = 'tests/ga_metrics_test_data.csv'
data = pd.read_csv(data_path)

# Examples

## `stat_summary` - Return Statistic Summary of Specified Campaign
Input a specific campaign ID and the dates to calculate mean, median and standard deviation from the data points, which are the values of the 4 metrics grouped by date.
- The 4 metrics are identical as above.
- The output will be a pandas Dataframe with index are the mean, median and standard deviation, and the columns are the 4 metrics.

### Assign the campaign ID and dates.

In [8]:
campaign_id = 219011657  # campaign ID's data type should be int
start_date = 20220810  # the start date of the specified campaign, the data type should be int
end_date = 20220811  # the end date of the specified campaign, the data type should be int

In [9]:
summary = stat_summary(data, campaign_id, start_date, end_date)
summary

Unnamed: 0,return_rate,conversion_rate,ttl_revenue,avg_revenue
Mean,0.85,0.05,389.5,38.95
Median,0.85,0.05,389.5,38.95
Standard Deviation,0.05,0.05,389.5,38.95


## Load Test Data 

In [6]:
data_path = '../tests/ga_metrics_test_data.csv'
data = pd.read_csv(data_path)

In [8]:
data.head()

Unnamed: 0,visitorId,visitNumber,visitId,visitStartTime,date,fullVisitorId,userId,clientId,channelGrouping,socialEngagementType,...,transaction.localTransactionShipping,transaction.transactionCoupon,item.transactionId,item.productName,item.productCategory,item.productSku,item.itemQuantity,item.itemRevenue,item.currencyCode,item.localItemRevenue
0,,1,1501591568,1501591568,20220801,3.41833e+18,,,Organic Search,Not Socially Engaged,...,,,,,,,,,,
1,,2,1501589647,1501589647,20220802,2.4744e+18,,,Referral,Not Socially Engaged,...,,,,,,,,,,
2,,1,1501616621,1501616621,20220803,5.87046e+18,,,Referral,Not Socially Engaged,...,,,,,,,,,,
3,,1,1501601200,1501601200,20220804,9.39781e+18,,,Referral,Not Socially Engaged,...,,,,,,,,,,
4,,1,1501615525,1501615525,20220805,6.0899e+18,,,Referral,Not Socially Engaged,...,,,,,,,,,,


## Example of `find_campaigns` 
#### Function Description:
The `find_campaigns` function is part of a toolkit designed to analyze marketing campaign performance over a specified period using data from Google Analytics. By inputting a dataframe containing campaign information, alongside the desired date range and metrics, users can swiftly identify the most and least effective campaigns. It will return a dictionary for further use

#### Steps:
- Define the analysis period (in int or Timestamp data format)
- Prepare the list of campaign IDs to analyze (in int format)
- Decide the metric to evaluate campaign performance (in str format)
- Call function to find the best and worst campaigns based on the conversion rate

In [12]:

# Define the analysis period
start_date = 20220801 
end_date = 20220825   

campaign_ids = [219011657, 140569061, 215934049, 123851219]

# Metric to evaluate campaign performance
metric = 'conversion_rate'

# Find the best and worst campaigns based on the conversion rate
output_dict = find_campaigns(
    data=data,
    start_date=start_date,
    end_date=end_date,
    campaign_ids=campaign_ids,
    metric=metric
)



In [13]:
print(output_dict)

{'best_campaign': {'id': 123851219, 'value': 0.116}, 'worst_campaign': {'id': 219011657, 'value': 0.056}}


In [15]:

print(f"Best Campaign: ID {output_dict['best_campaign']['id']} with a {metric} of {output_dict['best_campaign']['value']}")
print(f"Worst Campaign: ID {output_dict['worst_campaign']['id']} with a {metric} of {output_dict['worst_campaign']['value']}")


Best Campaign: ID 123851219 with a conversion_rate of 0.116
Worst Campaign: ID 219011657 with a conversion_rate of 0.056
