#  Problematic Reports Regarding Performance

---

>You will know how to answer the following questions at the end of this notebook.
>- [x] What is a problematic report regarding performance?

In [78]:
import pandas as pd

### 1.  Loading data

- **Active Reports**

In [79]:
active_reports = pd.read_csv("../../datasets/active_reports.csv", low_memory=False)

In [80]:
active_ids = list(active_reports['Id'])

In [81]:
API_TOTAL_USAGE = pd.read_csv("../../data/ApiTotalUsage/ApiTotalUsage_logs.csv", low_memory=False)

In [82]:
API_TOTAL_USAGE.columns.values

array(['EVENT_TYPE', 'TIMESTAMP', 'REQUEST_ID', 'ORGANIZATION_ID',
       'USER_ID', 'API_FAMILY', 'API_VERSION', 'API_RESOURCE',
       'CLIENT_NAME', 'HTTP_METHOD', 'CLIENT_IP',
       'COUNTS_AGAINST_API_LIMIT', 'CONNECTED_APP_ID', 'ENTITY_NAME',
       'STATUS_CODE', 'TIMESTAMP_DERIVED'], dtype=object)

In [83]:
API_TOTAL_USAGE

Unnamed: 0,EVENT_TYPE,TIMESTAMP,REQUEST_ID,ORGANIZATION_ID,USER_ID,API_FAMILY,API_VERSION,API_RESOURCE,CLIENT_NAME,HTTP_METHOD,CLIENT_IP,COUNTS_AGAINST_API_LIMIT,CONNECTED_APP_ID,ENTITY_NAME,STATUS_CODE,TIMESTAMP_DERIVED
0,ApiTotalUsage,2.022070e+13,SLB:ad62558fd073cd197c73f39c7e1db1a5,00D0b000000GaMp,0052R000009jUQ4,REST,45.0,/v45.0/composite,,POST,143.166.230.64,1.0,0H42R000000L9rS,,200.0,2022-07-04T04:00:00.106Z
1,ApiTotalUsage,2.022070e+13,SLB:881fabba268569e1da1a7243ed7a5c6a,00D0b000000GaMp,0050b000004KISt,SOAP,41.0,query,Boomi/,,143.166.255.113,1.0,,Asset,200.0,2022-07-04T04:00:00.147Z
2,ApiTotalUsage,2.022070e+13,SLB:f0dc014b63d2c5a603a09ca6bce6ce25,00D0b000000GaMp,0051P000003kdtk,REST,45.0,/v45.0/sobjects/Case/5006P000006I1WyQAK,,PATCH,54.210.111.111,1.0,88890000000CjPo,Case,204.0,2022-07-04T04:00:00.249Z
3,ApiTotalUsage,2.022070e+13,SLB:66deab1363488a21b24605a4dee76dd6,00D0b000000GaMp,0056P000001A4hE,SOAP,49.0,query,Boomi/,,54.164.68.107,1.0,,Case,200.0,2022-07-04T04:00:00.259Z
4,ApiTotalUsage,2.022070e+13,SLB:15f9e6e0ca7c1783eee623ae79c65f2f,00D0b000000GaMp,0050b000004KISt,SOAP,41.0,query,Boomi/,,143.166.255.113,1.0,,Contact,200.0,2022-07-04T04:00:00.294Z
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7492610,ApiTotalUsage,2.022073e+13,SLB:43449459fc7a61d7b0a8922128401c09,00D0b000000GaMp,0056P000001A5Q6,REST,46.0,/v46.0/composite,,POST,143.166.86.67,1.0,0H46P000000CaSP,,200.0,2022-07-27T14:10:16.001Z
7492611,ApiTotalUsage,2.022073e+13,SLB:73b0a9de8d6138edfdccace385df19c1,00D0b000000GaMp,0052R000009zx7v,REST,45.0,/v45.0/composite,,POST,143.166.86.64,1.0,0H42R000000PF7i,,200.0,2022-07-27T14:10:16.038Z
7492612,ApiTotalUsage,2.022073e+13,SLB:35d2fba7ebafdab9bb1e476a0b7f92a7,00D0b000000GaMp,0050b000004KISt,SOAP,41.0,query,Boomi/,,143.166.226.115,1.0,,Account,200.0,2022-07-27T14:10:16.106Z
7492613,ApiTotalUsage,2.022073e+13,SLB:894c6bd829921b1d5a186699a90b8838,00D0b000000GaMp,0050b000004KISt,SOAP,41.0,query,Boomi/,,143.166.226.119,1.0,,Account,200.0,2022-07-27T14:10:16.123Z


### 1.1 COUNTS_AGAINST_API_LIMIT

by the [Documentation](https://developer.salesforce.com/docs/atlas.en-us.238.0.object_reference.meta/object_reference/sforce_api_objects_eventlogfile_apitotalusage.htm?q=API%20TOTAL) Whether the request counted against the API limit (true) or not (false).

In [84]:
API_TOTAL_USAGE.COUNTS_AGAINST_API_LIMIT.value_counts()

1.0    7492501
0.0        114
Name: COUNTS_AGAINST_API_LIMIT, dtype: int64

In [85]:
API_TOTAL_USAGE_FILTERED = API_TOTAL_USAGE[API_TOTAL_USAGE['COUNTS_AGAINST_API_LIMIT'] == 1]

In [86]:
API_TOTAL_USAGE_FILTERED

Unnamed: 0,EVENT_TYPE,TIMESTAMP,REQUEST_ID,ORGANIZATION_ID,USER_ID,API_FAMILY,API_VERSION,API_RESOURCE,CLIENT_NAME,HTTP_METHOD,CLIENT_IP,COUNTS_AGAINST_API_LIMIT,CONNECTED_APP_ID,ENTITY_NAME,STATUS_CODE,TIMESTAMP_DERIVED
0,ApiTotalUsage,2.022070e+13,SLB:ad62558fd073cd197c73f39c7e1db1a5,00D0b000000GaMp,0052R000009jUQ4,REST,45.0,/v45.0/composite,,POST,143.166.230.64,1.0,0H42R000000L9rS,,200.0,2022-07-04T04:00:00.106Z
1,ApiTotalUsage,2.022070e+13,SLB:881fabba268569e1da1a7243ed7a5c6a,00D0b000000GaMp,0050b000004KISt,SOAP,41.0,query,Boomi/,,143.166.255.113,1.0,,Asset,200.0,2022-07-04T04:00:00.147Z
2,ApiTotalUsage,2.022070e+13,SLB:f0dc014b63d2c5a603a09ca6bce6ce25,00D0b000000GaMp,0051P000003kdtk,REST,45.0,/v45.0/sobjects/Case/5006P000006I1WyQAK,,PATCH,54.210.111.111,1.0,88890000000CjPo,Case,204.0,2022-07-04T04:00:00.249Z
3,ApiTotalUsage,2.022070e+13,SLB:66deab1363488a21b24605a4dee76dd6,00D0b000000GaMp,0056P000001A4hE,SOAP,49.0,query,Boomi/,,54.164.68.107,1.0,,Case,200.0,2022-07-04T04:00:00.259Z
4,ApiTotalUsage,2.022070e+13,SLB:15f9e6e0ca7c1783eee623ae79c65f2f,00D0b000000GaMp,0050b000004KISt,SOAP,41.0,query,Boomi/,,143.166.255.113,1.0,,Contact,200.0,2022-07-04T04:00:00.294Z
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7492610,ApiTotalUsage,2.022073e+13,SLB:43449459fc7a61d7b0a8922128401c09,00D0b000000GaMp,0056P000001A5Q6,REST,46.0,/v46.0/composite,,POST,143.166.86.67,1.0,0H46P000000CaSP,,200.0,2022-07-27T14:10:16.001Z
7492611,ApiTotalUsage,2.022073e+13,SLB:73b0a9de8d6138edfdccace385df19c1,00D0b000000GaMp,0052R000009zx7v,REST,45.0,/v45.0/composite,,POST,143.166.86.64,1.0,0H42R000000PF7i,,200.0,2022-07-27T14:10:16.038Z
7492612,ApiTotalUsage,2.022073e+13,SLB:35d2fba7ebafdab9bb1e476a0b7f92a7,00D0b000000GaMp,0050b000004KISt,SOAP,41.0,query,Boomi/,,143.166.226.115,1.0,,Account,200.0,2022-07-27T14:10:16.106Z
7492613,ApiTotalUsage,2.022073e+13,SLB:894c6bd829921b1d5a186699a90b8838,00D0b000000GaMp,0050b000004KISt,SOAP,41.0,query,Boomi/,,143.166.226.119,1.0,,Account,200.0,2022-07-27T14:10:16.123Z


### 3. Retrieving Repord ID from API_RESOURCE


In [87]:
import re

pattern = re.compile(r'\/[0-9a-zA-Z]*[.][0-9]*\/sobjects\/(?P<report_type>[a-zA-Z]{4,})\/(?P<report_id>[0-9a-zA-Z]{18})')

def filter_run_report_endpoints(pattern, field, url):
    m = re.match(pattern, url)
    if m:
        return m.group(field)

In [88]:
api_usage_copy = API_TOTAL_USAGE_FILTERED.copy()

In [89]:
API_TOTAL_USAGE_FILTERED.API_RESOURCE.unique()

array(['/v45.0/composite', 'query',
       '/v45.0/sobjects/Case/5006P000006I1WyQAK', ...,
       '/v41.0/sobjects/Case/5006P000006dvE4QAI',
       '/v41.0/sobjects/Case/5006P000006wg21QAA',
       '/v41.0/sobjects/Case/5006P000006OM1PQAW'], dtype=object)

In [90]:
api_usage_copy['REPORT_ID_DERIVED'] =\
    api_usage_copy.API_RESOURCE.apply(lambda url: filter_run_report_endpoints(pattern, 'report_id',url))

In [91]:
api_usage_copy.REPORT_ID_DERIVED.unique()

array([None, '5006P000006I1WyQAK', '5006P000005PdsJQAS', ...,
       '5006P000006dvE4QAI', '5006P000006wg21QAA', '5006P000006OM1PQAW'],
      dtype=object)

In [92]:
api_usage_copy_actives = api_usage_copy[api_usage_copy['REPORT_ID_DERIVED'].isin(active_ids)]

In [93]:
api_usage_copy.dropna(subset=['REPORT_ID_DERIVED'], inplace=True)

In [94]:
len(api_usage_copy['REPORT_ID_DERIVED'].unique())

90991

In [95]:
len(pageview_logs['ReportId'].unique())

478326

### Merge actives reports with reports

In [96]:
pageview_logs = pd.read_csv("../../data/pageview/LightningPageView_newlogs.csv", low_memory=False)

In [97]:
api_pageview_logs = pd.merge(
    left=pageview_logs, 
    right=api_usage_copy, 
    left_on='ReportId', right_on='REPORT_ID_DERIVED')

In [98]:
len(api_pageview_logs['ReportId'].unique())

18658

Removing duplicate rows

In [99]:
probl_reportsSet = api_pageview_logs.drop_duplicates(subset=["REPORT_ID_DERIVED"], keep=False)

In [100]:
probl_reportsSet = probl_reportsSet.rename(columns={'TIMESTAMP_DERIVED_y': 'TIMESTAMP_DERIVED'})

In [101]:
columns = ['REPORT_ID_DERIVED','TIMESTAMP_DERIVED','ReportType','UI_EVENT_SOURCE','COUNTS_AGAINST_API_LIMIT','STATUS_CODE','CONNECTED_APP_ID','API_RESOURCE','HTTP_METHOD','API_FAMILY','API_VERSION']

In [102]:
probl_reportsSet = probl_reportsSet[columns]

In [103]:
len(probl_reportsSet['REPORT_ID_DERIVED'].unique())

2846

In [104]:
probl_reportsSet

Unnamed: 0,REPORT_ID_DERIVED,TIMESTAMP_DERIVED,ReportType,UI_EVENT_SOURCE,COUNTS_AGAINST_API_LIMIT,STATUS_CODE,CONNECTED_APP_ID,API_RESOURCE,HTTP_METHOD,API_FAMILY,API_VERSION
391,5006P000005aIm8QAE,2022-07-04T04:02:37.334Z,Case,,1.0,204.0,88890000000CjPo,/v45.0/sobjects/Case/5006P000005aIm8QAE,PATCH,REST,45.0
620,5006P000005RMMfQAO,2022-07-05T07:32:42.253Z,Case,,1.0,204.0,88890000000CjPo,/v45.0/sobjects/Case/5006P000005RMMfQAO,PATCH,REST,45.0
1348,0052R00000ASQKlQAP,2022-07-04T04:39:40.757Z,User,,1.0,200.0,88890000000CjPo,/v45.0/sobjects/User/0052R00000ASQKlQAP,GET,REST,45.0
2105,5006P0000067gCKQAY,2022-07-22T07:46:59.112Z,Case,,1.0,204.0,88890000000CjPo,/v45.0/sobjects/Case/5006P0000067gCKQAY,PATCH,REST,45.0
2139,0056P000000Dpr3QAC,2022-07-27T13:00:42.290Z,User,,1.0,200.0,88890000000CjPo,/v45.0/sobjects/User/0056P000000Dpr3QAC,GET,REST,45.0
...,...,...,...,...,...,...,...,...,...,...,...
876714,5006P000006j8MaQAI,2022-07-27T12:00:14.777Z,Case,,1.0,204.0,88890000000CjPo,/v45.0/sobjects/Case/5006P000006j8MaQAI,PATCH,REST,45.0
876718,5706P000001hOWpQAM,2022-07-27T12:41:35.241Z,LiveChatTranscript,,1.0,200.0,88890000000CjPo,/v35.0/sobjects/LiveChatTranscript/5706P000001...,GET,REST,35.0
876778,5706P000001lZCtQAM,2022-07-27T12:39:34.193Z,LiveChatTranscript,,1.0,200.0,88890000000CjPo,/v35.0/sobjects/LiveChatTranscript/5706P000001...,GET,REST,35.0
876953,5706P000001dFtcQAE,2022-07-27T12:38:10.206Z,LiveChatTranscript,,1.0,200.0,88890000000CjPo,/v35.0/sobjects/LiveChatTranscript/5706P000001...,GET,REST,35.0


Storing a dataset with active and problematic reports.

In [105]:
# active_reports_performance.to_csv("2022-07-27_active_and_problematic_reports_performance.csv", index=False)