In [1]:
import sys
sys.path.insert(0, '..')

from intransparent import (
    ingest_reports_per_platform,
    long_ncmec_reports,
    compare_all_platform_reports,
    REPORTS_PER_PLATFORM,
    show,
    YEAR_LABELS,
)

data = ingest_reports_per_platform(REPORTS_PER_PLATFORM, include_redundant=True)

show('<h1>CSAM Reports per Platform</h1>')
show('<h2>An Overview of Platform Disclosures</h2>')
show(data.features, caption='Platforms and Their Disclosures')

Unnamed: 0_level_0,data,history,terms,quantities,granularity,frequency,coverage,has_reports
platform,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Facebook,csv,data,child nudity & sexual exploitation; child sexual exploitation,rounded,Q,Q,2018 Q3,False
Google,⋯,same page (dropdown),CSAM,counts,H,H,2020 H1,True
Instagram,csv,data,child nudity & sexual exploitation; sexual exploitation,rounded,Q,Q,2019 Q2,False
LinkedIn,⋯,same page (tabs),child exploitation,counts,H,H,2019 H1,False
Pinterest,⋯,same page (tabs),child safety; child sexual exploitation; CSAM,counts,Q,H,2020 H1,True
Quora,⋯,⋯,⋯,⋯,⋯,⋯,⋯,⋯
Reddit,⋯,page archive,minor sexualization; child sexual exploitation; CSAM,counts,H,H,2021,True
Snap,⋯,page archive,child sexual exploitation and abuse imagery,counts,H,H,2019 H2,True
Telegram,⋯,⋯,⋯,⋯,⋯,⋯,⋯,⋯
TikTok,csv,page archive,sexual exploitation of minors; CSAM; youth exploitation and abuse,fractions,Q,Q,2022 Q1,False


In [2]:
show('<h2>Ranking of Social Media Firms</h2>')

ncmec = long_ncmec_reports(data)
totals = ncmec[ncmec['platform'] == 'Total']['reports']

for year in YEAR_LABELS:
    yearly = (
        ncmec.loc[year]
        .sort_values('reports', ascending=False)
        .assign(reports_pct=lambda df: df['reports'] / totals[year] * 100)
        .drop(columns='redundant')
    )
    show(
        yearly,
        caption=f' Social Media by CSAM Reports File in {year}'
    )

Unnamed: 0_level_0,platform,reports,reports_pct
period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019,Total,16836694,100.00000000
2019,Meta,15884511,94.34459639
2019,Alphabet,449283,2.66847518
2019,Snap,82030,0.48720966
2019,Twitter,45726,0.27158538
2019,Automattic,10443,0.06202524
2019,Pinterest,7360,0.04371405
2019,Reddit,724,0.00430013
2019,TikTok,596,0.00353989
2019,LinkedIn,88,0.00052267


Unnamed: 0_level_0,platform,reports,reports_pct
period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020,Total,21447786,100.00000000
2020,Meta,20307216,94.68210845
2020,Alphabet,546704,2.54899970
2020,Snap,144095,0.67184091
2020,Twitter,65062,0.30335066
2020,TikTok,22692,0.10580113
2020,Automattic,9130,0.04256850
2020,Pinterest,3432,0.01600165
2020,Reddit,2233,0.01041133
2020,LinkedIn,60,0.00027975


Unnamed: 0_level_0,platform,reports,reports_pct
period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021,Total,29157083,100.0000000
2021,Meta,26885302,92.2084764
2021,Alphabet,875783,3.0036715
2021,Snap,512522,1.7577959
2021,TikTok,154618,0.5302931
2021,Twitter,86666,0.2972382
2021,Reddit,10059,0.0344993
2021,Automattic,4821,0.0165346
2021,Pinterest,2283,0.0078300
2021,LinkedIn,110,0.0003773


Unnamed: 0_level_0,platform,reports,reports_pct
period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022,Total,31802525,100.000000
2022,Meta,27190665,85.498447
2022,Alphabet,2174548,6.837658
2022,Snap,551086,1.732837
2022,TikTok,288125,0.905982
2022,Twitter,98050,0.308309
2022,Reddit,52592,0.165371
2022,Pinterest,34310,0.107885
2022,Automattic,5035,0.015832
2022,Quora,2242,0.007050


In [3]:
show('<h2>Transparency Data Quality per Social Medium</h2>')

comparisons = compare_all_platform_reports(data)

brand_holders = len(data.brands)
brand_holder_brands = sum(len(brands) for brands in data.brands.values())
# Account for '@' and 'NCMEC':
surveyed_brands = len(REPORTS_PER_PLATFORM) - 1 - 1 - brand_holders
surveyed_firms = surveyed_brands - brand_holder_brands + brand_holders
# Account for 'NCMEC':
brands_with_disclosures = len(set(data.disclosures) - set(data.brands)) - 1

show(
    f"""
    Out of {surveyed_brands} surveyed social media platforms owned by
    {surveyed_firms} firms, {brands_with_disclosures} brands make transparency
    disclosures but only {len(comparisons) + 1} platforms owned by
    {len(comparisons)} firms disclose the number of CSAM reports submitted to
    NCMEC.
    """
)

for platform, table in comparisons.items():
    show(table, caption=platform)

Unnamed: 0_level_0,pieces,π,reports,Δ%,NCMEC
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020,⋯,⋯,3432,≡,3432
2021,1608,0.599,2684,-14.94,2283
2022,37136,1.127,32964,4.08,34310


Unnamed: 0_level_0,pieces,π,reports,Δ%,NCMEC
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019,⋯,⋯,724,≡,724
2020,⋯,⋯,2233,≡,2233
2021,9258,0.920,10059,≡,10059
2022,80888,1.538,52592,≡,52592


Unnamed: 0_level_0,pieces,π,reports,Δ%,NCMEC
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020,⋯,⋯,⋯,⋯,144095
2021,⋯,⋯,⋯,⋯,512522
2022,1273838,2.31,550755,0.0601,551086


Unnamed: 0_level_0,pieces,π,reports,Δ%,NCMEC
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020,4437853,8.1,547875,-0.2137,546704
2021,6696497,7.69,870319,0.6278,875783
2022,13402885,6.16,2174319,0.0105,2174548
