In [1]:
import sys
sys.path.insert(0, '..')

from intransparent import (
    ingest_reports_per_platform,
    long_ncmec_reports,
    compare_all_platform_reports,
    REPORTS_PER_PLATFORM,
    show,
)

show('<h1>CSAM Reports per Platform</h1>')
show('<h2>Ingesting Platform Disclosures</h2>')

def logger(format: str, *args, **kwargs) -> None:
    print(format.format(*args, **kwargs))

data = ingest_reports_per_platform(REPORTS_PER_PLATFORM, logger=logger)

show('<h2>An Overview of Platform Disclosures</h2>')
show(data.features, caption='Platforms and Their Disclosures')

Skipping metadata
❌ Alphabet (no CSAM data)
❌ Automattic (no CSAM data)
✅ Discord
✅ Facebook
✅ Google
✅ Instagram
✅ LinkedIn
❌ Meta (no CSAM data)
✅ Microsoft
❌ Omegle (no CSAM data)
✅ Pinterest
❌ Quora (no CSAM data)
✅ Reddit
✅ Snap
❌ Telegram (no transparency disclosures)
✅ TikTok
❌ Tumblr (no CSAM data)
✅ Twitch
✅ Twitter
❌ WhatsApp (no CSAM data)
❌ Wordpress (no CSAM data)
❌ X (no CSAM data)
✅ YouTube
✅ NCMEC


Unnamed: 0_level_0,data,history,terms,quantities,granularity,frequency,coverage,has_reports
platform,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Alphabet,⋯,⋯,⋯,⋯,⋯,⋯,⋯,⋯
Automattic,⋯,⋯,⋯,⋯,⋯,⋯,⋯,⋯
Discord,csv,same page (dropdown),child safety; CSAM,counts,Q,Q,2020 H2,True
Facebook,csv,data,child nudity & sexual exploitation; child sexual exploitation,rounded,Q,Q,2018 Q3,False
Google,⋯,same page (dropdown),CSAM,counts,H,H,2020 H1,True
Instagram,csv,data,child nudity & sexual exploitation; sexual exploitation,rounded,Q,Q,2019 Q2,False
LinkedIn,⋯,same page (tabs),child exploitation,counts,H,H,2019 H1,False
Meta,⋯,⋯,⋯,⋯,⋯,⋯,⋯,⋯
Microsoft,xls,same page (dropdown),CSAM,counts,H,H,2020 H1,True
Omegle,⋯,⋯,⋯,⋯,⋯,⋯,⋯,⋯


In [2]:
show('<h2>Ranking of Social Media Firms</h2>')

ncmec = long_ncmec_reports(data)
totals = ncmec[ncmec['platform'] == 'Total']['reports']

year = ncmec.index.min()
stop = ncmec.index.max()

while year <= stop:
    yearly = (
        ncmec.loc[year]
        .sort_values('reports', ascending=False)
        .assign(reports_pct=lambda df: df['reports'] / totals[year] * 100)
    )

    yearly.loc[yearly['platform'].isin(['ESP Total', 'Total']), 'reports_pct'] = None
    yearly['cumpct'] = yearly['reports_pct'].cumsum()

    show(
        yearly,
        caption=f' Social Media by CSAM Reports Filed in {year}',
    )
    year += 1

Unnamed: 0_level_0,platform,reports,reports_pct,cumpct
period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019,Total,16987361,⋯,⋯
2019,ESP Total,16836694,⋯,⋯
2019,Meta,15884511,93.50782031,93.5
2019,Alphabet,449283,2.64480751,96.2
2019,Microsoft,123927,0.72952473,96.9
2019,Snap,82030,0.48288843,97.4
2019,X,45726,0.26917660,97.6
2019,Discord,19480,0.11467349,97.7
2019,Automattic,10443,0.06147512,97.8
2019,Pinterest,7360,0.04332633,97.9


Unnamed: 0_level_0,platform,reports,reports_pct,cumpct
period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020,Total,21751085,⋯,⋯
2020,ESP Total,21447786,⋯,⋯
2020,Meta,20307216,93.36185298,93.4
2020,Alphabet,546704,2.51345623,95.9
2020,Snap,144095,0.66247270,96.5
2020,Microsoft,96836,0.44520078,97.0
2020,X,65062,0.29912071,97.3
2020,TikTok,22692,0.10432583,97.4
2020,Omegle,20265,0.09316777,97.5
2020,Discord,15324,0.07045166,97.6


Unnamed: 0_level_0,platform,reports,reports_pct,cumpct
period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021,Total,29397681,⋯,⋯
2021,ESP Total,29157083,⋯,⋯
2021,Meta,26885302,91.4538191,91.5
2021,Alphabet,875783,2.9790887,94.4
2021,Snap,512522,1.7434096,96.2
2021,TikTok,154618,0.5259531,96.7
2021,X,86666,0.2948056,97.0
2021,Microsoft,78883,0.2683307,97.3
2021,Omegle,46924,0.1596180,97.4
2021,Discord,29606,0.1007086,97.5


Unnamed: 0_level_0,platform,reports,reports_pct,cumpct
period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022,Total,32059029,⋯,⋯
2022,ESP Total,31802525,⋯,⋯
2022,Meta,27190665,84.81437,84.8
2022,Alphabet,2174548,6.78295,91.6
2022,Omegle,608601,1.89838,93.5
2022,Snap,551086,1.71897,95.2
2022,TikTok,288125,0.89873,96.1
2022,Discord,169800,0.52965,96.6
2022,Microsoft,108798,0.33937,97.0
2022,X,98050,0.30584,97.3


Unnamed: 0_level_0,platform,reports,reports_pct,cumpct
period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023,Total,36210368,⋯,⋯
2023,ESP Total,35944826,⋯,⋯
2023,Meta,30658047,84.6665,84.7
2023,Alphabet,1470958,4.0623,88.7
2023,X,870503,2.4040,91.1
2023,Snap,713055,1.9692,93.1
2023,TikTok,590376,1.6304,94.7
2023,Discord,339412,0.9373,95.7
2023,Reddit,290141,0.8013,96.5
2023,Omegle,188102,0.5195,97.0


In [3]:
show('<h2>Transparency Data Quality per Social Medium</h2>')

comparisons = compare_all_platform_reports(data)

brand_holders = len(data.brands)
brand_holder_brands = sum(len(brands) for brands in data.brands.values())
# Account for '@' and 'NCMEC':
surveyed_brands = len(REPORTS_PER_PLATFORM) - 1 - 1 - brand_holders
surveyed_firms = surveyed_brands - brand_holder_brands + brand_holders
# Account for 'NCMEC':
brands_with_disclosures = len(set(data.disclosures) - set(data.brands)) - 1

show(
    f"""
    Out of {surveyed_brands} surveyed social media platforms owned by
    {surveyed_firms} firms, {brands_with_disclosures} brands make transparency
    disclosures. Furthermore, only {len(comparisons)} platforms disclose usable
    piece or report counts.
    """
)

for platform, table in comparisons.items():
    # We have only LinkedIn's pieces available for 2019, but those counts are
    # tiny when compared to Microsoft's. Hence we ignore 2019.
    if platform == "Microsoft":
        table = table.loc[table.index.year != 2019]
    show(table, caption=platform)

Unnamed: 0_level_0,pieces,π,reports,Δ%,NCMEC,esp%,esp,total%,total,esp/total%
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020,4437853,8.1,547875,-0.2137,546704,2.55,21447786,2.51,21751085,98.6
2021,6696497,7.69,870319,0.6278,875783,3.0,29157083,2.98,29397681,99.2
2022,13402885,6.16,2174319,0.0105,2174548,6.84,31802525,6.78,32059029,99.2


Unnamed: 0_level_0,pieces,π,reports,Δ%,NCMEC,esp%,esp,total%,total,esp/total%
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2021,⋯,⋯,24623,20.2,29606,0.102,29157083,0.101,29397681,99.2
2022,⋯,⋯,58179,191.9,169800,0.534,31802525,0.53,32059029,99.2
2023,⋯,⋯,164478,106.4,339412,0.944,35944826,0.937,36210368,99.3


Unnamed: 0_level_0,pieces,π,reports,Δ%,NCMEC,esp%,esp,total%,total,esp/total%
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2019,39368400,2.48,⋯,⋯,15884511,94.3,16836694,93.5,16987361,99.1
2020,38890800,1.92,⋯,⋯,20307216,94.7,21447786,93.4,21751085,98.6
2021,78012400,2.9,⋯,⋯,26885302,92.2,29157083,91.5,29397681,99.2
2022,105800000,3.89,⋯,⋯,27190665,85.5,31802525,84.8,32059029,99.2
2023,63300000,2.06,⋯,⋯,30658047,85.3,35944826,84.7,36210368,99.3


Unnamed: 0_level_0,pieces,π,reports,Δ%,NCMEC,esp%,esp,total%,total,esp/total%
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020,1256855,13.03,96435,0.4158,96836,0.451,21447786,0.445,21751085,98.6
2021,564609,7.15,78926,-0.0545,78883,0.271,29157083,0.268,29397681,99.2
2022,454321,4.22,107599,1.1143,108798,0.342,31802525,0.339,32059029,99.2


Unnamed: 0_level_0,pieces,π,reports,Δ%,NCMEC,esp%,esp,total%,total,esp/total%
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020,⋯,⋯,3432,≡,3432,0.016,21447786,0.01578,21751085,98.6
2021,1608,0.599,2684,-14.94,2283,0.00783,29157083,0.00777,29397681,99.2
2022,37136,1.127,32964,4.08,34310,0.10788,31802525,0.10702,32059029,99.2


Unnamed: 0_level_0,pieces,π,reports,Δ%,NCMEC,esp%,esp,total%,total,esp/total%
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2019,⋯,⋯,724,≡,724,0.0043,16836694,0.00426,16987361,99.1
2020,⋯,⋯,2233,≡,2233,0.01041,21447786,0.01027,21751085,98.6
2021,9258,0.920,10059,≡,10059,0.0345,29157083,0.03422,29397681,99.2
2022,80888,1.538,52592,≡,52592,0.16537,31802525,0.16405,32059029,99.2


Unnamed: 0_level_0,pieces,π,reports,Δ%,NCMEC,esp%,esp,total%,total,esp/total%
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020,⋯,⋯,⋯,⋯,144095,0.672,21447786,0.662,21751085,98.6
2021,⋯,⋯,⋯,⋯,512522,1.758,29157083,1.743,29397681,99.2
2022,1273838,2.31,550755,0.0601,551086,1.733,31802525,1.719,32059029,99.2


Unnamed: 0_level_0,pieces,π,reports,Δ%,NCMEC,esp%,esp,total%,total,esp/total%
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020,⋯,⋯,2158,⋯,⋯,⋯,21447786,⋯,21751085,98.6
2021,⋯,⋯,6621,0.121,6629,0.0227,29157083,0.0225,29397681,99.2
2022,⋯,⋯,14296,1.483,14508,0.0456,31802525,0.0453,32059029,99.2
