In [1]:
import sys
sys.path.insert(0, '..')

from intransparent import (
    ingest_reports_per_platform,
    long_ncmec_reports,
    compare_all_platform_reports,
    REPORTS_PER_PLATFORM,
    show,
    YEAR_LABELS,
)

data = ingest_reports_per_platform(REPORTS_PER_PLATFORM, include_redundant=True)

show('<h1>CSAM Reports per Platform</h1>')
show(f'<h2>Ranking of Social Media Firms</h2>')

ncmec = long_ncmec_reports(data)
totals = ncmec[ncmec['platform'] == 'Total']['reports']

for year in YEAR_LABELS:
    yearly = (
        ncmec.loc[year]
        .sort_values('reports', ascending=False)
        .assign(reports_pct=lambda df: df['reports'] / totals[year] * 100)
    )
    show(
        yearly,
        caption=f' Social Media by CSAM Reports File in {year}'
    )

Unnamed: 0_level_0,platform,reports,redundant,reports_pct
period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019,Total,16836694,False,100.00000000
2019,Meta,15884511,False,94.34459639
2019,Alphabet,449283,False,2.66847518
2019,Snap,82030,False,0.48720966
2019,Twitter,45726,False,0.27158538
2019,Automattic,10443,False,0.06202524
2019,Pinterest,7360,False,0.04371405
2019,Reddit,724,False,0.00430013
2019,TikTok,596,False,0.00353989
2019,LinkedIn,88,False,0.00052267


Unnamed: 0_level_0,platform,reports,redundant,reports_pct
period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020,Total,21447786,False,100.00000000
2020,Meta,20307216,False,94.68210845
2020,Alphabet,546704,False,2.54899970
2020,Snap,144095,False,0.67184091
2020,Twitter,65062,False,0.30335066
2020,TikTok,22692,False,0.10580113
2020,Automattic,9130,False,0.04256850
2020,Pinterest,3432,False,0.01600165
2020,Reddit,2233,False,0.01041133
2020,LinkedIn,60,False,0.00027975


Unnamed: 0_level_0,platform,reports,redundant,reports_pct
period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021,Total,29157083,False,100.0000000
2021,Meta,26885302,False,92.2084764
2021,Alphabet,875783,False,3.0036715
2021,Snap,512522,False,1.7577959
2021,TikTok,154618,False,0.5302931
2021,Twitter,86666,False,0.2972382
2021,Reddit,10059,False,0.0344993
2021,Automattic,4821,False,0.0165346
2021,Pinterest,2283,False,0.0078300
2021,LinkedIn,110,False,0.0003773


Unnamed: 0_level_0,platform,reports,redundant,reports_pct
period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022,Total,31802525,False,100.000000
2022,Meta,27190665,False,85.498447
2022,Alphabet,2174548,False,6.837658
2022,Snap,551086,False,1.732837
2022,TikTok,288125,False,0.905982
2022,Twitter,98050,False,0.308309
2022,Reddit,52592,False,0.165371
2022,Pinterest,34310,False,0.107885
2022,Automattic,5035,False,0.015832
2022,Quora,2242,False,0.007050


In [2]:
show('<h2>Transparency Data Quality per Social Medium</h2>')

comparisons = compare_all_platform_reports(data)

brand_holders = len(data.brands)
brand_holder_brands = sum(len(brands) for brands in data.brands.values())
# Account for '@' and 'NCMEC':
surveyed_brands = len(REPORTS_PER_PLATFORM) - 1 - 1 - brand_holders
surveyed_firms = surveyed_brands - brand_holder_brands + brand_holders
# Account for 'NCMEC':
brands_with_disclosures = len(set(data.disclosures) - set(data.brands)) - 1

show(
    f"""
    Out of {surveyed_brands} surveyed social media platforms owned by
    {surveyed_firms} firms, {brands_with_disclosures} brands make transparency
    disclosures but only {len(comparisons) + 1} platforms owned by
    {len(comparisons)} firms disclose the number of CSAM reports submitted to
    NCMEC.
    """
)

for platform, data in comparisons.items():
    show(data, caption=platform)

Unnamed: 0_level_0,pieces,reports,Δ%,NCMEC
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020,⋯,3432,0.0,3432
2021,1608,2684,-14.94,2283
2022,37136,32964,4.08,34310


Unnamed: 0_level_0,pieces,reports,Δ%,NCMEC
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019,⋯,724,0.0,724
2020,⋯,2233,0.0,2233
2021,9258,10059,0.0,10059
2022,80888,52592,0.0,52592


Unnamed: 0_level_0,pieces,reports,Δ%,NCMEC
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020,⋯,⋯,⋯,144095
2021,⋯,⋯,⋯,512522
2022,1273838,550755,0.0601,551086


Unnamed: 0_level_0,pieces,reports,Δ%,NCMEC
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020,4437853,547875,-0.2137,546704
2021,6696497,870319,0.6278,875783
2022,13402885,2174319,0.0105,2174548
