In [1]:
import pickle
from datetime import datetime
from types import SimpleNamespace

import ads
import pandas as pds
from pint import Quantity
from tqdm import tqdm


ads.config.token = "place your ADS API token here"

In [2]:
def build_query(**kwargs):
    return " ".join([f"{key}:{value}" for key, value in kwargs.items()])


def get_all_publications(**kwargs):
    return list(
        ads.SearchQuery(
            q=build_query(**kwargs),
            rows=2000,
            max_pages=100,
            fl=[
                "id",
                "property",
                "bibcode",
                "title",
                "citation_count",
                "citation",
                "database",
                "author",
                "aff",
                "doi",
                "year",
            ],
        )
    )


def make_database(missions):
    return SimpleNamespace(
        **{
            name: get_all_publications(**mission.query)
            for name, mission in tqdm(missions.__dict__.items())
        }
    )


def filter_refereed(pub_list):
    return list(filter(lambda p: "REFEREED" in p.property or [], pub_list))


def filter_affiliation(pub_list, name_list):
    def has_lpp_affiliation(pub) -> bool:
        def is_lpp(aff: str) -> bool:
            return any([name.lower() in aff.lower() for name in name_list])

        if pub.aff is None:
            print(
                f"Got none: pub.aff {pub.id}, {pub.doi}, {pub.title}, {pub.database}, {pub.property}"
            )
        return any(filter(lambda affiliation: is_lpp(affiliation or []), pub.aff or []))

    return filter(lambda p: has_lpp_affiliation(p), pub_list)


def filter_publication_year(pub_list, year: int):
    return filter(lambda p: str(year) in p.year, pub_list)


def filter_publication_until_year(pub_list, year: int):
    return filter(lambda p: year > int(p.year), pub_list)

In [327]:
Quantity("620 * 2  kg")

# Missions database, add or remove mission here:

This database is used to generate queries for ADS and get all article for each entry (Mission) in the database.

bilan.total_publications_LPP_2021[1][0].titleEntry specification:
- launch_date must be a datetime
- query can be anything compatible with ADS query
- add suspicious=True if you feel like request results doesn't match what you would expect

In [3]:
missions = SimpleNamespace(
    PSP=SimpleNamespace(
        name="Parker Solar Probe",
        launch_date=datetime(2018, 8, 12),
        query={"body": '"Parker Solar Probe" OR "NASA PSP"'},
        mass=Quantity("685 kg"),
    ),
    SDO=SimpleNamespace(
        name="Solar Dynamics Observatory",
        launch_date=datetime(2010, 2, 11),
        query={
            "body": '"Solar Dynamics Observatory" OR "HMI/SDO" OR "AIA/SDO" OR "EVE/SDO"'
        },
        mass=Quantity("3,100 kg"),
    ),
    SolarOrbiter=SimpleNamespace(
        name="Solar Orbiter",
        launch_date=datetime(2020, 2, 10),
        query={
            "body": '"Solar Orbiter" OR "SOLO/RPW" OR "SOLO/LFR"'
        },
        mass=Quantity("1,800 kg"),
    ),
    SOHO=SimpleNamespace(
        name="Solar and Heliospheric Observatory",
        launch_date=datetime(1995, 12, 2),
        query={"body": '"SOHO" OR "Solar and Heliospheric Observatory"'},
        mass=Quantity("1,850 kg"),
    ),
    MMS=SimpleNamespace(
        name="Magnetospheric Multiscale Mission",
        launch_date=datetime(2015, 3, 13),
        query={"body": '"Magnetospheric Multiscale" OR "NASA/MMS"'},
        mass=Quantity("1,360 kg"),
    ),
    Cluster=SimpleNamespace(
        name="Cluster",
        launch_date=datetime(2000, 7, 16),
        query={
            "body": '"Cluster spacecraft" OR "Cluster mission" OR "Cluster satellite constellation"'
        },
        mass=Quantity("1,200 kg"),
    ),
    Cassini=SimpleNamespace(
        name="Cassini",
        launch_date=datetime(1997, 10, 15),
        query={
            "body": '"Cassini mission" OR "Cassini spacecraft" OR "Cassini orbiter" OR "Cassini-Huygens" OR "Huygens module" OR ="Huygens probe" OR "Huygens lander"'
        },
        mass=Quantity("5,712 kg"),
    ),
    DoubleStar=SimpleNamespace(
        name="DoubleStar",
        launch_date=datetime(2003, 12, 29),
        query={
            "body": '"Double Star spacecrafts" OR "Double Star satellites" OR "Double Star TC" OR "TC-1 satellite" OR "TC-2 satellite"'
        },
        mass=Quantity("560 kg"),
    ),
    Demeter=SimpleNamespace(
        name="Demeter",
        launch_date=datetime(2004, 6, 29),
        query={
            "body": '"DEMETER spacecraft" OR "DEMETER mission" OR "DEMETER satellite"'
        },
        mass=Quantity("130 kg"),
    ),
    STEREO=SimpleNamespace(
        name="STEREO",
        launch_date=datetime(2006, 10, 26),
        query={"body": '"STEREO"'},
        mass=Quantity("620 * 2  kg"),
    ),
    ACE=SimpleNamespace(
        name="Advanced Composition Explorer",
        launch_date=datetime(1997, 8, 25),
        query={
            "body": '"Advanced Composition Explorer" OR "ACE mission" OR "ACE spacecraft" OR ="ACE satellite"'
        },
        mass=Quantity("757 kg"),
    ),
    IMP_8=SimpleNamespace(
        name="IMP-8",
        launch_date=datetime(1973, 11, 26),
        query={
            "body": '"Interplanetary Monitoring Platform 8" OR "IMP-8" OR "IMP-J" OR "Explorer 50"'
        },
        mass=Quantity("371  kg"),
    ),
    Geotail=SimpleNamespace(
        name="Geotail",
        launch_date=datetime(1992, 7, 24),
        query={"body": '"Geotail"'},
        mass=Quantity("980 kg"),
    ),
    Wind=SimpleNamespace(
        name="Wind",
        launch_date=datetime(1994, 12, 1),
        query={
            "body": '"Wind mission" OR "Wind spacecraft" OR "Wind satellite" OR "WIND/WAVES" OR "Wind/3DP" OR "Wind/MFI" OR "Wind/SWE" OR "Wind/KONUS" OR "Wind/TGRS" OR "Wind/EPACT" OR "Wind/SMS"'
        },
        mass=Quantity("1,250 kg"),
    ),
    TIMED=SimpleNamespace(
        name="TIMED",
        launch_date=datetime(2001, 12, 7),
        query={"body": '"TIMED"'},
        mass=Quantity("660 kg"),
    ),
    VEX=SimpleNamespace(
        name="Venus Express",
        launch_date=datetime(2015, 1, 18),
        query={
            "body": '"Venus Express" OR "VEX mission" OR "VEX spacecraft" OR "VEX probe"'
        },
        mass=Quantity("1,270 kg"),
    ),
    THEMIS=SimpleNamespace(
        name="THEMIS",
        launch_date=datetime(2007, 2, 17),
        query={
            "body": '"THEMIS mission" OR "THEMIS spacecraft" OR "THEMIS-A" OR "THEMIS-B" OR "THEMIS probe" OR "ARTEMIS-P1"'
        },
        mass=Quantity("126 kg * 5"),
    ),
    Juno=SimpleNamespace(
        name="Juno",
        launch_date=datetime(2011, 8, 5),
        query={
            "body": '"Juno mission"  OR "Juno spacecraft" OR "Juno orbiter" OR "NASA/Juno" OR "Juno MWR" OR "Juno JIRAM" OR "Juno MAG" OR "Juno GS" OR "Juno JADE" OR "Juno JEDI" OR "Juno Waves" OR "Juno UVS" OR "Juno JCM"'
        },
        mass=Quantity("3,625 kg"),
    ),
    Rosetta=SimpleNamespace(
        name="Rosetta",
        launch_date=datetime(2004, 3, 2),
        query={
            "body": '"Rosetta mission" OR "Rosetta spacecraft" OR "Rosetta Churyumov-Gerasimenko" OR "Philae Churyumov-Gerasimenko"'
        },
        mass=Quantity("3,000 kg"),
    ),
    Galileo=SimpleNamespace(
        name="Galileo",
        launch_date=datetime(1989, 10, 18),
        query={
            "body": '"Galileo mission" OR "Galileo spacecraft" OR "Galileo orbiter" OR "NASA/Galileo"  OR "Galileo entry probe"'
        },
        mass=Quantity("2,560 kg"),
    ),
    Gaia=SimpleNamespace(
        name="Gaia",
        launch_date=datetime(2013, 12, 19),
        query={"body": '"GAIA"'},
        mass=Quantity("2,029 kg"),
    ),
    MEX=SimpleNamespace(
        name="Mars Express",
        launch_date=datetime(2003, 6, 2),
        query={
            "body": '"Mars Express" OR "MEX mission" OR "MEX spacecraft" OR "MEX probe"'
        },
        mass=Quantity("1,123 kg"),
    ),
    Dawn=SimpleNamespace(
        name="Dawn",
        launch_date=datetime(2007, 11, 27),
        query={
            "body": '"Dawn mission" OR "Dawn spacecraft" OR "Dawn orbiter" OR "NASA/Dawn" OR "Dawn Vesta" OR "Dawn Ceres"'
        },
        mass=Quantity("1,217.7 kg"),
    ),
    HST=SimpleNamespace(
        name="HST",
        launch_date=datetime(1990, 3, 24),
        query={"body": '"HST"'},
        suspicious=True,
        mass=Quantity("11,110 kg"),
    ),
    Chandra=SimpleNamespace(
        name="Chandra",
        launch_date=datetime(1999, 7, 23),
        query={
            "body": '"Chandra X-ray Observatory" OR "CXO" OR "Chandra mission" OR "Chandra satellite" OR "Chandra telescope" OR "Chandra spacecraft" OR "Chandra observatory" OR "Chandra X-ray" OR "Chandra data" OR "Chandra image" OR "Chandra spectrum" OR "NASA/Chandra" OR "Chandra/ACIS" OR "Chandra/HRC" OR "Chandra/LETG" OR "Chandra/HETG"'
        },
        suspicious=True,
        mass=Quantity("5,860 kg"),
    ),
)

CO2_mass_coefficient = Quantity("50000 t / t")

# Build and save publication database

In [407]:
all_publications = make_database(missions=missions)

with open(f"all_publications-{datetime.now().isoformat()}.pkl", "wb") as f:
    f.write(pickle.dumps(all_publications))

100%|██████████| 25/25 [15:31<00:00, 37.25s/it] 


# Load from local file to avoid repeating the request (if notebook crashed or for any reason...)

In [4]:
all_publications = pickle.load(
    open("all_publications-2023-01-20T15:38:55.867090.pkl", "rb")
)

# Actual analysis

In [5]:
def build_table(missions_db, publications, labs, years):
    years = sorted(years)
    bilan = SimpleNamespace(
        mission=[],
        mission_footprint=[],
        total_publications=[],
        anual_footprint=[],
        years_since_launch=[],
        footprint_per_paper=[],
        **{f"total_publications_LPP_{year}": [] for year in years},
    )
    for lab in labs:
        bilan.__dict__[f"total_publications_{lab.name}"] = []
        for year in years:
            bilan.__dict__[f"total_publications_{lab.name}_{year}"] = []
            bilan.__dict__[f"CO2e_mass_{lab.name}_{year}"] = []

    for mission_name, publications in publications.__dict__.items():

        mission = missions_db.__dict__[mission_name]

        years_since_launch = (
            datetime(max(years), 12, 31) - mission.launch_date
        ).days / 365
        bilan.years_since_launch.append(years_since_launch)

        mission_footprint = mission.mass * CO2_mass_coefficient
        bilan.mission_footprint.append(round(mission_footprint.to("t").magnitude))

        bilan.mission.append(mission_name)
        refered_publications = list(
            filter_publication_until_year(filter_refereed(publications), max(years) + 1)
        )
        bilan.total_publications.append(len(refered_publications))

        anual_footprint = mission_footprint / years_since_launch
        bilan.anual_footprint.append(round(anual_footprint.to("t").magnitude))

        footprint_per_paper = mission_footprint / len(refered_publications)
        bilan.footprint_per_paper.append(round(footprint_per_paper.to("t").magnitude))

        for lab in labs:
            total_lab = list(
                filter_affiliation(refered_publications, lab.possible_names)
            )
            bilan.__dict__[f"total_publications_{lab.name}"].append(len(total_lab))

            for year in years:
                total_publications = len(
                    list(filter_publication_year(refered_publications, year))
                )
                total_lab_publications = len(
                    list(filter_publication_year(total_lab, year))
                )
                if total_publications > 0 and mission.launch_date.year <= year:
                    bilan.__dict__[f"total_publications_{lab.name}_{year}"].append(
                        total_lab_publications
                    )
                    bilan.__dict__[f"CO2e_mass_{lab.name}_{year}"].append(
                        round(
                            (total_lab_publications * footprint_per_paper)
                            .to("t")
                            .magnitude
                        )
                    )
                else:
                    bilan.__dict__[f"total_publications_{lab.name}_{year}"].append(
                        total_lab_publications
                    )
                    bilan.__dict__[f"CO2e_mass_{lab.name}_{year}"].append(0)

    return pds.DataFrame.from_dict(bilan.__dict__).set_index("mission")

In [6]:
df = build_table(
    missions,
    all_publications,
    [
        SimpleNamespace(
            name="LPP", possible_names=["LPP", "laboratoire de physique des plasmas"]
        ),
        SimpleNamespace(
            name="IRAP", possible_names=["IRAP", "INSTITUT DE RECHERCHE EN ASTROPHYSIQUE ET PLANÉTOLOGIE"]
        ),
        SimpleNamespace(
            name="LPC2E", possible_names=["LPC2E", "Laboratoire de Physique et Chimie de l’Environnement et de l’Espace"]
        )
    ],
    range(2019, 2022),
)
df

Got none: pub.aff 17661366, ['10.1038/d41586-019-03710-1'], ['Parker probe kicks off a golden age for solar exploration'], ['astronomy', 'general'], ['ARTICLE', 'ESOURCE', 'REFEREED']
Got none: pub.aff 20896698, ['10.1038/d41586-021-03708-8'], ["Imperilled glacier, COVID immunity and `touching' the Sun"], ['astronomy'], ['ARTICLE', 'ESOURCE', 'REFEREED']
Got none: pub.aff 18779649, ['10.1038/d41586-020-00360-6'], ['CRISPR enhancement, coronavirus source and a controversial appointment'], ['astronomy', 'general'], ['ARTICLE', 'ESOURCE', 'REFEREED']
Got none: pub.aff 15269145, ['10.1038/d41586-018-05938-9'], ['Plagiarism rules, discrimination settlements and asteroid close-up'], ['general'], ['ARTICLE', 'ESOURCE', 'REFEREED']
Got none: pub.aff 19818323, None, ['Here and There'], ['astronomy'], ['ADS_OPENACCESS', 'ARTICLE', 'ESOURCE', 'OPENACCESS', 'REFEREED']
Got none: pub.aff 15508609, None, ['2018 February meeting of the RAS'], ['astronomy'], ['ADS_OPENACCESS', 'ARTICLE', 'ESOURCE', 'O

Unnamed: 0_level_0,mission_footprint,total_publications,anual_footprint,years_since_launch,footprint_per_paper,total_publications_LPP_2019,total_publications_LPP_2020,total_publications_LPP_2021,total_publications_LPP,CO2e_mass_LPP_2019,...,CO2e_mass_IRAP_2020,total_publications_IRAP_2021,CO2e_mass_IRAP_2021,total_publications_LPC2E,total_publications_LPC2E_2019,CO2e_mass_LPC2E_2019,total_publications_LPC2E_2020,CO2e_mass_LPC2E_2020,total_publications_LPC2E_2021,CO2e_mass_LPC2E_2021
mission,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
PSP,34250,982,10106,3.389041,35,5,12,32,51,174,...,1186,41,1430,85,4,140,47,1639,34,1186
SDO,155000,10798,13033,11.893151,14,0,1,2,6,0,...,301,23,330,37,2,29,12,172,1,14
SolarOrbiter,90000,5517,47609,1.890411,16,3,10,31,67,0,...,506,47,767,47,2,0,17,277,21,343
SOHO,92500,14888,3544,26.09863,6,2,3,6,22,12,...,99,22,137,34,4,25,3,19,3,19
MMS,68000,4034,9988,6.808219,17,31,19,31,155,523,...,354,17,287,22,1,17,6,101,6,101
Cluster,60000,2343,2794,21.473973,26,1,5,10,108,26,...,77,9,230,37,0,0,4,102,0,0
Cassini,285600,5834,11788,24.227397,49,1,1,4,12,49,...,49,3,147,16,0,0,0,0,0,0
DoubleStar,28000,94,1554,18.019178,298,0,0,0,1,0,...,298,1,298,0,0,0,0,0,0,0
Demeter,6500,264,371,17.517808,25,1,0,0,3,25,...,0,0,0,48,4,98,3,74,6,148
STEREO,62000,4891,4081,15.191781,13,1,4,8,39,13,...,304,20,254,50,3,38,18,228,12,152


In [7]:
df

Unnamed: 0_level_0,mission_footprint,total_publications,anual_footprint,years_since_launch,footprint_per_paper,total_publications_LPP_2019,total_publications_LPP_2020,total_publications_LPP_2021,total_publications_LPP,CO2e_mass_LPP_2019,...,CO2e_mass_IRAP_2020,total_publications_IRAP_2021,CO2e_mass_IRAP_2021,total_publications_LPC2E,total_publications_LPC2E_2019,CO2e_mass_LPC2E_2019,total_publications_LPC2E_2020,CO2e_mass_LPC2E_2020,total_publications_LPC2E_2021,CO2e_mass_LPC2E_2021
mission,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
PSP,34250,982,10106,3.389041,35,5,12,32,51,174,...,1186,41,1430,85,4,140,47,1639,34,1186
SDO,155000,10798,13033,11.893151,14,0,1,2,6,0,...,301,23,330,37,2,29,12,172,1,14
SolarOrbiter,90000,5517,47609,1.890411,16,3,10,31,67,0,...,506,47,767,47,2,0,17,277,21,343
SOHO,92500,14888,3544,26.09863,6,2,3,6,22,12,...,99,22,137,34,4,25,3,19,3,19
MMS,68000,4034,9988,6.808219,17,31,19,31,155,523,...,354,17,287,22,1,17,6,101,6,101
Cluster,60000,2343,2794,21.473973,26,1,5,10,108,26,...,77,9,230,37,0,0,4,102,0,0
Cassini,285600,5834,11788,24.227397,49,1,1,4,12,49,...,49,3,147,16,0,0,0,0,0,0
DoubleStar,28000,94,1554,18.019178,298,0,0,0,1,0,...,298,1,298,0,0,0,0,0,0,0
Demeter,6500,264,371,17.517808,25,1,0,0,3,25,...,0,0,0,48,4,98,3,74,6,148
STEREO,62000,4891,4081,15.191781,13,1,4,8,39,13,...,304,20,254,50,3,38,18,228,12,152


In [8]:
df.to_excel("space_missions_carbon_footprint.xlsx")