In [1]:
import plotly.graph_objects as go

from intransparent import (
    YEAR_LABELS,
    ingest_reports_per_country,
    without_populations,
    reports_per_capita_country_year,

    REPORTS_PER_PLATFORM,
    ingest_reports_per_platform,
    compare_all_platform_reports,

    show_html,
    show_info,
    show_table,
)

import intransparent.meta as meta

show_html(h1='CSAM Reports per Country', h2='Prelude')

country_data = ingest_reports_per_country('./data', logger=show_info)

countries_without, reports_without = (
    without_populations(country_data.reports, country_data.populations))
show_html(
    f'{countries_without.shape[0]} countries with reports have no population '
    'statistics. They also account for very few reports.')
show_table(reports_without, title='Reports of countries without population')


Unnamed: 0_level_0,reports,reports_pct
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2019,28,0.000165
2020,97,0.000446
2021,243,0.000827
2022,117,0.000365


In [2]:
show_html(
    h2='Regions with Most CSAM Reports', 
    p='Selection does <em>not</em> take population counts into consideration.')

most_reports = (
    country_data.reports_per_capita
    .groupby(['year', 'region'])
    .sum(numeric_only=True)
    .sort_values(by='reports', ascending=False)
)

for year in YEAR_LABELS:
    show_table(
        most_reports.query(f'year == "{year}"').head(20),
        title=f'Regions with most CSAM Reports {year}',
        highlights=['reports', 'reports_pct'],
    )


Unnamed: 0_level_0,Unnamed: 1_level_0,reports,reports_pct,population,population_pct,reports_per_capita
year,region,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019,Southern Asia,4072192,23.971893,1948009381,25.087209,0.036218
2019,South-Eastern Asia,2930393,17.250431,663897086,8.54992,0.047123
2019,Western Asia,2592785,15.263024,281896911,3.630376,0.17206
2019,North America,2226879,13.109034,371967924,4.790345,0.010929
2019,Northern Africa,1532676,9.022449,247023819,3.181267,0.056731
2019,South America,1085900,6.3924,428318165,5.516045,0.036619
2019,Central America,998465,5.877693,174800778,2.251151,0.030017
2019,Eastern Europe,388955,2.289673,293569779,3.780704,0.022998
2019,Southern Europe,256014,1.507085,153908666,1.982095,0.047284
2019,Western Europe,227164,1.337253,194572748,2.505782,0.019688


Unnamed: 0_level_0,Unnamed: 1_level_0,reports,reports_pct,population,population_pct,reports_per_capita
year,region,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020,Southern Asia,5423421,24.934025,1969617895,25.119626,0.062001
2020,South-Eastern Asia,4331450,19.913719,670164339,8.546976,0.066831
2020,Northern Africa,2665168,12.253035,251417771,3.20647,0.088981
2020,Western Asia,2466404,11.339223,285400157,3.639866,0.161639
2020,South America,2318004,10.656958,431530048,5.503541,0.078018
2020,North America,1179834,5.424254,373956673,4.769276,0.010638
2020,Central America,1166927,5.364914,176343834,2.24901,0.056281
2020,Eastern Europe,656341,3.017509,292833373,3.734666,0.028045
2020,Southern Europe,319052,1.466833,153740713,1.96074,0.061314
2020,Western Europe,251219,1.154972,195057784,2.48768,0.03501


Unnamed: 0_level_0,Unnamed: 1_level_0,reports,reports_pct,population,population_pct,reports_per_capita
year,region,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021,Southern Asia,9419913,32.043048,1989452481,25.153349,0.084941
2021,South-Eastern Asia,7665682,26.075805,675796068,8.544328,0.117317
2021,Western Asia,3391946,11.538141,288488938,3.647467,0.215441
2021,Northern Africa,2608377,8.87273,255737738,3.233382,0.096037
2021,North America,1972266,6.708917,375278947,4.744784,0.024389
2021,South America,1538207,5.232409,434254122,5.490428,0.051954
2021,Central America,1003846,3.414712,177661931,2.246242,0.051684
2021,Eastern Europe,303028,1.030789,291464165,3.685084,0.016512
2021,Western Europe,247762,0.842794,195381651,2.470279,0.02465
2021,Southern Europe,192725,0.655579,153374286,1.939165,0.038479


Unnamed: 0_level_0,Unnamed: 1_level_0,reports,reports_pct,population,population_pct,reports_per_capita
year,region,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022,Southern Asia,10712621,33.415301,2007549715,25.172707,0.070018
2022,South-Eastern Asia,6724864,20.976506,681125066,8.540641,0.095926
2022,North America,3404546,10.619617,376870696,4.725589,0.035403
2022,Western Asia,2701204,8.42572,292468620,3.66727,0.168206
2022,South America,2231052,6.9592,436816610,5.477252,0.075842
2022,Northern Africa,1709419,5.332098,259969947,3.259768,0.059205
2022,Central America,1144728,3.570688,179060361,2.245242,0.059855
2022,Eastern Europe,731334,2.281211,289626117,3.631628,0.044626
2022,Western Europe,507797,1.583944,195619899,2.452882,0.031434
2022,Northern Europe,504064,1.5723,106375996,1.333851,0.063095


In [3]:
show_html(h2='Countries with Most CSAM Reports Per Capita')

for year, group in reports_per_capita_country_year(country_data):
    show_table(
        group.head(20),
        title=f'reports per capita and country {year}',
        highlights='reports_per_capita',
    )


Unnamed: 0_level_0,iso3,year,reports,reports_pct,population,population_pct,reports_per_capita,country
rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,ARE,2019,330268,1.944198,9211657,0.118631,0.035853,United Arab Emirates
2,IRQ,2019,1026809,6.044547,41563521,0.535271,0.024705,Iraq
3,LBY,2019,150848,0.888001,6569088,0.084599,0.022963,Libya
4,QAT,2019,53467,0.314746,2807235,0.036153,0.019046,Qatar
5,BHR,2019,26093,0.153602,1494189,0.019243,0.017463,Bahrain
6,DZA,2019,700535,4.12386,42705369,0.549976,0.016404,Algeria
7,SYC,2019,1501,0.008836,104373,0.001344,0.014381,Seychelles
8,SAU,2019,514832,3.030677,35827362,0.461398,0.01437,Saudi Arabia
9,KWT,2019,63492,0.37376,4441100,0.057194,0.014296,Kuwait
10,JOR,2019,123537,0.727229,10698684,0.137782,0.011547,Jordan


Unnamed: 0_level_0,iso3,year,reports,reports_pct,population,population_pct,reports_per_capita,country
rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,LBY,2020,189955,0.873313,6653942,0.084861,0.028548,Libya
2,DZA,2020,1102939,5.070731,43451666,0.554163,0.025383,Algeria
3,MCO,2020,886,0.004073,36922,0.000471,0.023997,Monaco
4,ARE,2020,216874,0.997072,9287289,0.118446,0.023352,United Arab Emirates
5,IRQ,2020,919966,4.229518,42556984,0.542753,0.021617,Iraq
6,QAT,2020,56683,0.260598,2760385,0.035205,0.020534,Qatar
7,BTN,2020,15379,0.070705,772506,0.009852,0.019908,Bhutan
8,MAR,2020,705863,3.245185,36688772,0.467912,0.019239,Morocco
9,KWT,2020,68582,0.315304,4360445,0.055611,0.015728,Kuwait
10,BHR,2020,22960,0.105558,1477469,0.018843,0.01554,Bahrain


Unnamed: 0_level_0,iso3,year,reports,reports_pct,population,population_pct,reports_per_capita,country
rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,LBY,2021,270811,0.921199,6735277,0.085156,0.040208,Libya
2,ARE,2021,327820,1.115122,9365145,0.118407,0.035004,United Arab Emirates
3,IRQ,2021,1220470,4.151586,43533593,0.550411,0.028035,Iraq
4,PHL,2021,3188793,10.84709,113880328,1.439829,0.028001,Philippines
5,QAT,2021,73536,0.250142,2688235,0.033988,0.027355,Qatar
6,DZA,2021,1171653,3.985529,44177969,0.558558,0.026521,Algeria
7,KHM,2021,395793,1.346341,16589024,0.209741,0.023859,Cambodia
8,BHR,2021,32346,0.110029,1463266,0.018501,0.022105,Bahrain
9,BLZ,2021,8297,0.028223,400031,0.005058,0.020741,Belize
10,SAU,2021,708980,2.411687,35950396,0.454534,0.019721,Saudi Arabia


Unnamed: 0_level_0,iso3,year,reports,reports_pct,population,population_pct,reports_per_capita,country
rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,ARE,2022,265307,0.827558,9441129,0.118383,0.028101,United Arab Emirates
2,LBY,2022,163336,0.509485,6812341,0.08542,0.023976,Libya
3,QAT,2022,61167,0.190795,2695122,0.033794,0.022695,Qatar
4,PHL,2022,2576182,8.035746,115559009,1.448997,0.022293,Philippines
5,IRQ,2022,905883,2.825672,44496122,0.557938,0.020359,Iraq
6,BHR,2022,25896,0.080776,1472233,0.01846,0.01759,Bahrain
7,SAU,2022,602745,1.88011,36408820,0.456531,0.016555,Saudi Arabia
8,DZA,2022,731167,2.28069,44903225,0.563042,0.016283,Algeria
9,BLZ,2022,6103,0.019037,405273,0.005082,0.015059,Belize
10,MDV,2022,7813,0.024371,523787,0.006568,0.014916,Maldives


In [7]:
for year, group in country_data.reports_per_capita.groupby('year'):
    map_data = group.reset_index()

    fig = go.Figure(
        data=go.Choropleth(
            locations=map_data['iso3'],
            locationmode='ISO-3',
            z=map_data['reports_per_capita'],
            text=map_data['country'],
            colorscale='Plasma_r',
            showscale=True,
            colorbar_title='Reports<br>per Capita',
        ),
        layout=dict(
            width=600,
            height=300,
            margin={"r":0,"t": 40, "l":0,"b":0},
            geo=dict(
                showframe=False, # Don't frame the map
                lataxis_range=[-60,90], # Crop antarctica out of map
                bgcolor='#eaeaec', # Color of oceans
            ),
            #paper_bgcolor='#000000', # Enable when rightsizing the height
        ),
    )

    # Reduce width of border lines and improve presentation of color bar.
    fig.update_traces(
        marker_line_width=0.4,
        colorbar_thickness=20,
        colorbar_tickformat='.3f',
        colorbar_title_font_size=13,
        selector=dict(type='choropleth'),
    )

    # Suppress interior lakes
    fig.update_geos(fitbounds="locations", visible=False)

    # Set title
    fig.update_layout(
        title_text=f'CSAM Reports per Capita {year}',
    )
    fig.show()


In [5]:
show_html(h2='CSAM Reports per Social Media Platform')

disclosures = ingest_reports_per_platform(
    REPORTS_PER_PLATFORM, include_redundant=True)
comparisons = compare_all_platform_reports(disclosures)
for platform, data in comparisons.items():
    show_table(data, title=platform)

Unnamed: 0_level_0,pieces,reports,Δ%,NCMEC
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020,,3432,0.0,3432
2021,1608.0,2684,-14.940387,2283
2022,5530.0,4969,590.480982,34310


Unnamed: 0_level_0,pieces,reports,Δ%,NCMEC
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019,,724,0.0,724
2020,,2233,0.0,2233
2021,9258.0,10059,0.0,10059
2022,80888.0,52592,0.0,52592


Unnamed: 0_level_0,pieces,reports,Δ%,NCMEC
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019,,,,82030
2020,,,,144095
2021,,,,512522
2022,746051.0,285470.0,93.045154,551086


Unnamed: 0_level_0,pieces,reports,Δ%,NCMEC
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020,4437853,547875,-0.213735,546704
2021,6696497,870319,0.627816,875783
2022,13402885,2174319,0.010532,2174548


In [6]:
show_html(h2="Meta's Ever-Changing Transparency Disclosures")

meta_disclosures = meta.read_all('data', '2022q2', '2022q4')
meta_differences = meta.diff_all(meta_disclosures)

for p1, delta in meta_differences.items():
    divergent = (
        delta
        .groupby('period')
        .size()
        .to_frame()
        .rename(columns={0: 'divergent'})
    )

    p2 = p1 + 1
    title = f'Δ(Q{p1.quarter}-{p1.year} / Q{p2.quarter}-{p2.year})'
    show_table(divergent, title=title)


Unnamed: 0_level_0,divergent
period,Unnamed: 1_level_1
2020 Q4,77
2021 Q1,3
2021 Q2,4
2022 Q2,29


Unnamed: 0_level_0,divergent
period,Unnamed: 1_level_1
2020 Q2,67
2020 Q3,60
2020 Q4,58
2022 Q3,18
