In [1]:
import pandas as pd
from IPython.display import display, HTML

display(HTML('<h1>NCMEC vs Platform Disclosures 2021</h1>'))

# https://www.missingkids.org/content/dam/missingkids/pdfs/2021-reports-by-esp.pdf
NCMEC = pd.Series({
    "LinkedIn": 110,
    "Pinterest": 2_283,
    "Reddit": 10_059,
    "Snap": 512_522,
    "TikTok": 154_618,
    "Twitter": 86_666,
}, name='NCMEC')

# H1: "In the first half of 2021, 5.43% of the total number of accounts enforced
# globally contained CSAM" -- but also "Total Account Deletions: 119,134"
SNAP_H1_COUNT = 119_134
SNAP_H1_FRACTION = round(0.0543 * 2_510_798)
delta = (SNAP_H1_FRACTION - SNAP_H1_COUNT) / SNAP_H1_COUNT * 100
print(f"Snap H1: {SNAP_H1_COUNT:,d} + {delta:.1f}% = {SNAP_H1_FRACTION:,d}")

# "Total videos removed" vs "Total video removal, by policy"
TIKTOK = pd.DataFrame({
    "Total": [61_951_327, 81_518_334, 91_445_802, 85_794_222],
    "Minor Safety Pct": [0.368, 0.413, 0.510, 0.451]
}, index=['Q1', 'Q2', 'Q3', 'Q4'])

TIKTOK['Minor Safety'] = (
    TIKTOK['Minor Safety Pct'] * TIKTOK['Total']
).round(0).astype('int')

TIKTOK_TOTAL = TIKTOK['Minor Safety'].sum()
print(f"TikTok videos removed due to minor safety: {TIKTOK_TOTAL:,d}")


# "Child sexual exploitation"
TWITTER = {
    "Accounts actioned": 456_146 + 599_523,
    "Accounts suspended": 453_754 + 596_997,
}

PLATFORM = pd.Series({
    # "Child exploitation"
    "LinkedIn": 101 + 125,
    # H1: "Our team of specialists is trained to identify and review CSAM, and
    # was responsible for 890 CyberTipline reports to NCMEC from January to June
    # 2021."
    # H2: "From July to December 2021, our team of specialists was responsible
    # for 1,794 CyberTipline reports to NCMEC."
    "Pinterest": 890 + 1_795,
    # "we made 10,059 CyberTipline reports to NCMEC"
    "Reddit": 10_059,
    "Snap": SNAP_H1_FRACTION + 198_109,
    "TikTok": TIKTOK_TOTAL,
    "Twitter": TWITTER["Accounts suspended"],
}, name='Platform')

diff = (PLATFORM - NCMEC) / NCMEC * 100
diff.name = "∆"

df = pd.concat([NCMEC, diff, PLATFORM], axis=1)
display(df.style.format({"NCMEC": "{0:,d}", "∆": "{0:,.0f}%", "Platform": "{0:,d}"}))


Snap H1: 119,134 + 14.4% = 136,336
TikTok videos removed due to minor safety: 141,795,713


Unnamed: 0,NCMEC,∆,Platform
LinkedIn,110,105%,226
Pinterest,2283,18%,2685
Reddit,10059,0%,10059
Snap,512522,-35%,334445
TikTok,154618,"91,607%",141795713
Twitter,86666,"1,112%",1050751
