In [1]:
import re
import unicodedata
import pandas as pd


In [2]:
# Institutions
authors_institution_df = pd.read_csv("data_A_Z.csv")
authors_institution_df["share_pct"] = authors_institution_df["share_pct"].fillna(100)

# RePEc / JEL
A_B_df = pd.read_csv("RePEc_Full_A_B.csv")
D_df   = pd.read_csv("RePEc-D-not-full.csv")
E_Z_df = pd.read_csv("RePEc_Full_E_Z.csv")
O_df = pd.read_csv("RePEc_JEL_O.csv")
H_df = pd.read_csv("RePEc_Full_H.csv")
S_Y_df = pd.read_csv("RePEc-from-S-to-Y-full.csv")
J_df   = pd.read_csv("RePEc_J.csv")

JEL_df = pd.concat(
    [A_B_df, D_df, E_Z_df, S_Y_df, J_df, O_df, H_df],
    ignore_index=True
).drop_duplicates()


# explosion des co-auteurs

In [3]:
JEL_df = (
    JEL_df
        .dropna(subset=["Author(s)"])
        .assign(Author=lambda x: x["Author(s)"].str.split(";"))
        .explode("Author")
        .assign(Author=lambda x: x["Author"].str.strip())
        .query("Author != ''")
        .drop(columns=["Author(s)"])
        .rename(columns={"Author": "Author(s)"})
        .reset_index(drop=True)
)


# nettoyage des noms

In [4]:
def strip_accents(s: str) -> str:
    s = unicodedata.normalize("NFKD", s)
    return "".join(ch for ch in s if not unicodedata.combining(ch))

def normalize_whitespace(s: str) -> str:
    return re.sub(r"\s+", " ", s).strip()

def standardize_name(raw):
    if raw is None or pd.isna(raw):
        return ""
    s = strip_accents(str(raw).lower())
    if "," in s:
        last, rest = s.split(",", 1)
        s = rest.strip() + " " + last.strip()
    s = re.sub(r"[^\w\s]", " ", s)
    return normalize_whitespace(s)


# standardisation des 2 côtés

In [5]:
JEL_df["author_std"] = JEL_df["Author(s)"].apply(standardize_name)
authors_institution_df["author_std"] = authors_institution_df["author_name"].apply(standardize_name)


# clé auteur principal (ordre libre sans initiales)

In [6]:
def author_key_unordered_no_initial(raw):
    s = standardize_name(raw)
    if not s:
        return ""
    tokens = [t for t in s.split() if len(t) > 1]
    return "|".join(sorted(tokens))


In [7]:
JEL_df["author_key"] = JEL_df["Author(s)"].apply(author_key_unordered_no_initial)
authors_institution_df["author_key"] = authors_institution_df["author_name"].apply(author_key_unordered_no_initial)


In [8]:
JEL_matched = (
    JEL_df
        .merge(
            authors_institution_df,
            on="author_key",
            how="inner",
            suffixes=("_jel", "_inst")
        )
)

JEL_matched["match_type"] = "exact"


In [9]:
JEL_matched["author_name"].nunique()

11080

In [10]:
JEL_matched

Unnamed: 0,JEL Subject,Title,Journal,Year,Type,Affiliations,URL,Author(s),author_std_jel,author_key,author_name,short_id,institution,share_pct,location,repec_institution_id,author_url,author_std_inst,match_type
0,A,Preparing students for careers using business ...,,2020.0,Working Paper,Erland Hejn Nielsen: Department of Economics a...,https://econpapers.repec.org/paper/aahaarhec/2...,Nielsen Steen,nielsen steen,nielsen|steen,"Nielsen, Steen",pni71,Institut for Økonomi Aarhus Universitet,100,"Aarhus, Denmark",edi:ifoaudk,https://ideas.repec.org/e/pni71.html,steen nielsen,exact
1,A,"Measuring Democracy - Eight indices: Polity, F...",,2021.0,Working Paper,Martin Paldam: Department of Economics and Bus...,https://econpapers.repec.org/paper/aahaarhec/2...,Paldam Martin,paldam martin,martin|paldam,"Paldam, Martin",ppa574,School of Economics and Management Institut fo...,100,"Aarhus, Denmark",edi:anaaudk,https://ideas.repec.org/f/ppa574.html,martin paldam,exact
2,A,Oeconstudiet og den ÃÂ¸konomiske faggruppe ve...,,2023.0,Working Paper,Svend Hylleberg: Department of Economics and B...,https://econpapers.repec.org/paper/aahaarhec/2...,Hylleberg Svend,hylleberg svend,hylleberg|svend,"Hylleberg, Svend",phy1,Center for Research in Econometric Analysis of...,100,"Aarhus, Denmark",edi:creaudk,https://ideas.repec.org/e/phy1.html,svend hylleberg,exact
3,A,How Do Truckers Perceive and Respond to the Ri...,AEI Economic Perspectives,2024.0,Journal Article,Michael Strain: American Enterprise Institute,https://econpapers.repec.org/article/aeijournl...,Strain Michael,strain michael,michael|strain,"Strain, Michael R.",pst593,Institute of Labor Economics (IZA),3%,"Bonn, Germany",edi:izaaade,https://ideas.repec.org/f/pst593.html,michael r strain,exact
4,A,Architecture to Transform Classic Academic Cou...,The AMFITEATRU ECONOMIC journal,2024.0,Journal Article,Andrei Bobocea: Bucharest University of Econom...,https://econpapers.repec.org/article/aesamfeco...,Batagan Lorena,batagan lorena,batagan|lorena,"Batagan, Lorena",pba893,Academia de Studii Economice din Bucureşti,100,"Bucureşti, Romania",edi:aseeero,https://ideas.repec.org/f/pba893.html,lorena batagan,exact
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96116,H,Optimal taxation under different concepts of j...,,2018.0,Working Paper,,https://econpapers.repec.org/paper/zbwrwirep/7...,Rostam-Afschar Davud,rostam afschar davud,afschar|davud|rostam,"Rostam-Afschar, Davud",pro386,Institute of Labor Economics (IZA),1%,"Bonn, Germany",edi:izaaade,https://ideas.repec.org/f/pro386.html,davud rostam afschar,exact
96117,H,Do tax cuts Increase consumption? An experimen...,,2014.0,Working Paper,,https://econpapers.repec.org/paper/zbwsfb649/s...,Meissner Thomas,meissner thomas,meissner|thomas,"Meissner, Thomas",pme758,School of Business and Economics Maastricht Un...,100,"Maastricht, Netherlands",edi:femaanl,https://ideas.repec.org/f/pme758.html,thomas meissner,exact
96118,H,Do tax cuts Increase consumption? An experimen...,,2014.0,Working Paper,,https://econpapers.repec.org/paper/zbwsfb649/s...,Rostam-Afschar Davud,rostam afschar davud,afschar|davud|rostam,"Rostam-Afschar, Davud",pro386,Fakultät für Betriebswirtschaftslehre Universi...,98%,"Mannheim, Germany",edi:bvmande,https://ideas.repec.org/f/pro386.html,davud rostam afschar,exact
96119,H,Do tax cuts Increase consumption? An experimen...,,2014.0,Working Paper,,https://econpapers.repec.org/paper/zbwsfb649/s...,Rostam-Afschar Davud,rostam afschar davud,afschar|davud|rostam,"Rostam-Afschar, Davud",pro386,Global Labor Organization (GLO),1%,"Essen, Germany",edi:glabode,https://ideas.repec.org/f/pro386.html,davud rostam afschar,exact


# travail sur les non matchés

In [11]:
matched_keys = set(JEL_matched["author_key"])

JEL_unmatched = (
    JEL_df[
        ~JEL_df["author_key"].isin(matched_keys)
    ]
    .copy()
)


In [12]:
def author_tokens(raw):
    s = standardize_name(raw)
    if not s:
        return set()
    return {t for t in s.split() if len(t) > 1}


In [13]:
JEL_unmatched["tokens"] = JEL_unmatched["Author(s)"].apply(author_tokens)
authors_institution_df["tokens"] = authors_institution_df["author_name"].apply(author_tokens)



In [14]:
inst_tokens_list = list(
    zip(
        authors_institution_df["tokens"],
        authors_institution_df["author_name"]
    )
)


In [15]:
from collections import defaultdict

token_index = defaultdict(list)

for _, row in authors_institution_df.iterrows():
    inst_tokens = row["tokens"]
    if len(inst_tokens) < 2:
        continue

    for tok in inst_tokens:
        if len(tok) > 3:
            token_index[tok].append((inst_tokens, row["author_name"]))



In [16]:
def subset_match_simple_fast(row):
    jel_tokens = row["tokens"]
    if len(jel_tokens) < 2:
        return None

    # tokens longs côté JEL
    long_tokens = [t for t in jel_tokens if len(t) > 3]
    if len(long_tokens) < 2:
        return None

    # candidats = institutions partageant ≥1 token long
    candidates = []
    for tok in long_tokens:
        candidates.extend(token_index.get(tok, []))

    for inst_tokens, inst_name in candidates:
        # mots communs longs
        common = {
            t for t in jel_tokens & inst_tokens
            if len(t) > 3
        }

        if len(common) < 2:
            continue

        # inclusion stricte
        if jel_tokens.issubset(inst_tokens) or inst_tokens.issubset(jel_tokens):
            return inst_name

    return None




In [17]:
JEL_unmatched["author_name_subset_strict"] = (
    JEL_unmatched.apply(subset_match_simple_fast, axis=1)
)



In [18]:
subset_simple = JEL_unmatched[
    JEL_unmatched["author_name_subset_strict"].notna()
]


In [19]:
subset_simple[
    ["Author(s)", "author_name_subset_strict"]
].drop_duplicates().sample(50, random_state=0)


Unnamed: 0,Author(s),author_name_subset_strict
82443,Franaszek Joanna,"Franaszek, Joanna Magdalena"
165011,Sobiech Pellegrini Izabela,"Sobiech, Izabela"
331548,Mendolicchio Concetta,"Mendolicchio, Concetta Barbara"
103311,Graziano Maria,"Graziano, Maria Gabriella"
154322,Sariiski Grigor,"Sariiski, Grigor Dimitrov"
249584,Jora Octavian-Dragomir Nedef Matei-Åtefan,"Jora, Octavian-Dragomir"
451843,Diaz Daniel,"Diaz-Fuentes, Daniel"
193633,Bogale Melaku,"Fitawok, Melaku Bogale"
355517,Kumar Nitin,"Kumar Singh, Nitin"
494889,Muscatelli Vito,"Muscatelli, Vito Antonio"


In [20]:
subset_simple = subset_simple.copy()

subset_simple["author_name"] = subset_simple["author_name_subset_strict"]


In [21]:
subset_simple = subset_simple.drop(columns=["author_name_subset_strict"])


In [22]:
subset_simple.head()


Unnamed: 0,JEL Subject,Title,Journal,Year,Type,Affiliations,URL,Author(s),author_std,author_key,tokens,author_name
206,A,Influencing ESG Perception in SMEs through CSR...,The AMFITEATRU ECONOMIC journal,2024.0,Journal Article,Jaroslav Belas: Alexander Dubcek University in...,https://econpapers.repec.org/article/aesamfeco...,Balcerzak Adam P.,balcerzak adam p,adam|balcerzak,"{adam, balcerzak}","Balcerzak, Adam P. Sr."
220,A,Size of Government and Economic Growth: A Conv...,The AMFITEATRU ECONOMIC journal,2024.0,Journal Article,Juan David Garcia Gonzalez: University of Alme...,https://econpapers.repec.org/article/aesamfeco...,Manso Jose Ramos Pires,manso jose ramos pires,jose|manso|pires|ramos,"{manso, pires, ramos, jose}","Ramos, Jose"
258,A,Online Teaching Practices and the Effectivenes...,The AMFITEATRU ECONOMIC journal,2020.0,Journal Article,Cristina Venera Tartavulea: The Bucharest Univ...,https://econpapers.repec.org/article/aesamfeco...,Albu Catalin Nicolae,albu catalin nicolae,albu|catalin|nicolae,"{albu, nicolae, catalin}","Albu, Nicolae"
319,A,ESG rating of capital's effect on firms' finan...,Access Journal,2024.0,Journal Article,Sholpan Shalbayeva: Almaty Management Universi...,https://econpapers.repec.org/article/aipaccess...,Ismailov Taner,ismailov taner,ismailov|taner,"{ismailov, taner}","Ismailov, Taner Mustafov"
403,A,A NEW CHALLENGE â INTELLECTUAL CAPITAL EVALU...,Revista Tinerilor Economisti (The Young Econom...,2014.0,Journal Article,Ph. D Student Maria-Luminita Gogan: âPoliteh...,https://econpapers.repec.org/article/aiorteyej...,Gogan Ph. D Student Maria-Luminita,gogan ph d student maria luminita,gogan|luminita|maria|ph|student,"{ph, student, maria, gogan, luminita}","Gogan, Luminita Maria"


In [23]:
subset_simple_merged = (
    subset_simple
        .merge(
            authors_institution_df,
            on="author_name",
            how="inner",
            suffixes=("_jel", "_inst")
        )
)




In [24]:
subset_simple_merged.shape



(12203, 21)

In [25]:
subset_simple_merged.head(50)


Unnamed: 0,JEL Subject,Title,Journal,Year,Type,Affiliations,URL,Author(s),author_std_jel,author_key_jel,...,author_name,short_id,institution,share_pct,location,repec_institution_id,author_url,author_std_inst,author_key_inst,tokens_inst
0,A,Influencing ESG Perception in SMEs through CSR...,The AMFITEATRU ECONOMIC journal,2024.0,Journal Article,Jaroslav Belas: Alexander Dubcek University in...,https://econpapers.repec.org/article/aesamfeco...,Balcerzak Adam P.,balcerzak adam p,adam|balcerzak,...,"Balcerzak, Adam P. Sr.",pba1247,Wydział Nauk Ekonomicznych Universytet Warmińs...,100,"Olsztyn, Poland",edi:wnuwmpl,https://ideas.repec.org/f/pba1247.html,adam p sr balcerzak,adam|balcerzak|sr,"{balcerzak, adam, sr}"
1,A,Size of Government and Economic Growth: A Conv...,The AMFITEATRU ECONOMIC journal,2024.0,Journal Article,Juan David Garcia Gonzalez: University of Alme...,https://econpapers.repec.org/article/aesamfeco...,Manso Jose Ramos Pires,manso jose ramos pires,jose|manso|pires|ramos,...,"Ramos, Jose",pra393,Instituto Valenciano de Investigaciones Económ...,100,"Valencia, Spain",edi:ievages,https://ideas.repec.org/f/pra393.html,jose ramos,jose|ramos,"{ramos, jose}"
2,A,Online Teaching Practices and the Effectivenes...,The AMFITEATRU ECONOMIC journal,2020.0,Journal Article,Cristina Venera Tartavulea: The Bucharest Univ...,https://econpapers.repec.org/article/aesamfeco...,Albu Catalin Nicolae,albu catalin nicolae,albu|catalin|nicolae,...,"Albu, Nicolae",pal605,Facultatea de Ştiinţe Economice Universitatea ...,100,"Brasov, Romania",edi:fetbvro,https://ideas.repec.org/f/pal605.html,nicolae albu,albu|nicolae,"{albu, nicolae}"
3,A,ESG rating of capital's effect on firms' finan...,Access Journal,2024.0,Journal Article,Sholpan Shalbayeva: Almaty Management Universi...,https://econpapers.repec.org/article/aipaccess...,Ismailov Taner,ismailov taner,ismailov|taner,...,"Ismailov, Taner Mustafov",pis198,D. A. Tsenov Academy of Economics,100,"Svishtov, Bulgaria",edi:tsenobg,https://ideas.repec.org/e/pis198.html,taner mustafov ismailov,ismailov|mustafov|taner,"{ismailov, taner, mustafov}"
4,A,A NEW CHALLENGE â INTELLECTUAL CAPITAL EVALU...,Revista Tinerilor Economisti (The Young Econom...,2014.0,Journal Article,Ph. D Student Maria-Luminita Gogan: âPoliteh...,https://econpapers.repec.org/article/aiorteyej...,Gogan Ph. D Student Maria-Luminita,gogan ph d student maria luminita,gogan|luminita|maria|ph|student,...,"Gogan, Luminita Maria",pgo683,Facultatea de Management în Producţie şi Trans...,100,"Timişoara, Romania",edi:ptuptro,https://ideas.repec.org/f/pgo683.html,luminita maria gogan,gogan|luminita|maria,"{maria, luminita, gogan}"
5,A,Sustainability Reporting and the Impact on Acc...,CECCAR Business Review,2024.0,Journal Article,Robert-Aurelian Èova: Bucharest University of...,https://econpapers.repec.org/article/ahdjournl...,Popa Adriana Florina,popa adriana florina,adriana|florina|popa,...,"Popa, Florina",ppo505,Institutul de Economie Nationala Institutul Na...,100,"Bucureşti, Romania",edi:inacaro,https://ideas.repec.org/f/ppo505.html,florina popa,florina|popa,"{popa, florina}"
6,A,"The Accounting Education, Between Digitalisati...",CECCAR Business Review,2020.0,Journal Article,Robert-Aurelian Èova: Bucharest University of...,https://econpapers.repec.org/article/ahdjournl...,Popa Adriana Florina,popa adriana florina,adriana|florina|popa,...,"Popa, Florina",ppo505,Institutul de Economie Nationala Institutul Na...,100,"Bucureşti, Romania",edi:inacaro,https://ideas.repec.org/f/ppo505.html,florina popa,florina|popa,"{popa, florina}"
7,A,Accounting Education â Between Digitalisatio...,CECCAR Business Review,2020.0,Journal Article,Robert-Aurelian Èova: Bucharest University of...,https://econpapers.repec.org/article/ahdjournl...,Popa Adriana Florina,popa adriana florina,adriana|florina|popa,...,"Popa, Florina",ppo505,Institutul de Economie Nationala Institutul Na...,100,"Bucureşti, Romania",edi:inacaro,https://ideas.repec.org/f/ppo505.html,florina popa,florina|popa,"{popa, florina}"
8,A,ECONOMICS OF INTEGRATION. A DEBATE OUTLINE,Internal Auditing and Risk Management,2015.0,Journal Article,,https://econpapers.repec.org/article/athjournl...,Andrei Liviu,andrei liviu,andrei|liviu,...,"Andrei, Liviu Catalin",pan318,Facultatea de Administraţie Publică Şcoala Naţ...,100,"Bucureşti, Romania",edi:fasnsro,https://ideas.repec.org/f/pan318.html,liviu catalin andrei,andrei|catalin|liviu,"{liviu, andrei, catalin}"
9,A,"THE HEALTH OF ACADEMIC ORGANIZATION, A PROJECT...",Management Strategies Journal,2017.0,Journal Article,Vasile Miltiade Stanciu: Spiru Haret Universit...,https://econpapers.repec.org/article/brcjournl...,Stanciu Vasile Miltiade,stanciu vasile miltiade,miltiade|stanciu|vasile,...,"Stanciu, Miltiade",pst278,Academia de Studii Economice din Bucureşti,100,"Bucureşti, Romania",edi:aseeero,https://ideas.repec.org/f/pst278.html,miltiade stanciu,miltiade|stanciu,"{stanciu, miltiade}"


In [26]:
subset_simple_merged["match_type"] = "subset"



In [27]:
JEL_matched_final = pd.concat(
    [JEL_matched, subset_simple_merged],
    axis=0,
    ignore_index=True
)


In [28]:
JEL_matched_final["Author(s)"].nunique()

14186

# cleaning

In [29]:
JEL_matched_final

Unnamed: 0,JEL Subject,Title,Journal,Year,Type,Affiliations,URL,Author(s),author_std_jel,author_key,...,share_pct,location,repec_institution_id,author_url,author_std_inst,match_type,author_key_jel,tokens_jel,author_key_inst,tokens_inst
0,A,Preparing students for careers using business ...,,2020.0,Working Paper,Erland Hejn Nielsen: Department of Economics a...,https://econpapers.repec.org/paper/aahaarhec/2...,Nielsen Steen,nielsen steen,nielsen|steen,...,100,"Aarhus, Denmark",edi:ifoaudk,https://ideas.repec.org/e/pni71.html,steen nielsen,exact,,,,
1,A,"Measuring Democracy - Eight indices: Polity, F...",,2021.0,Working Paper,Martin Paldam: Department of Economics and Bus...,https://econpapers.repec.org/paper/aahaarhec/2...,Paldam Martin,paldam martin,martin|paldam,...,100,"Aarhus, Denmark",edi:anaaudk,https://ideas.repec.org/f/ppa574.html,martin paldam,exact,,,,
2,A,Oeconstudiet og den ÃÂ¸konomiske faggruppe ve...,,2023.0,Working Paper,Svend Hylleberg: Department of Economics and B...,https://econpapers.repec.org/paper/aahaarhec/2...,Hylleberg Svend,hylleberg svend,hylleberg|svend,...,100,"Aarhus, Denmark",edi:creaudk,https://ideas.repec.org/e/phy1.html,svend hylleberg,exact,,,,
3,A,How Do Truckers Perceive and Respond to the Ri...,AEI Economic Perspectives,2024.0,Journal Article,Michael Strain: American Enterprise Institute,https://econpapers.repec.org/article/aeijournl...,Strain Michael,strain michael,michael|strain,...,3%,"Bonn, Germany",edi:izaaade,https://ideas.repec.org/f/pst593.html,michael r strain,exact,,,,
4,A,Architecture to Transform Classic Academic Cou...,The AMFITEATRU ECONOMIC journal,2024.0,Journal Article,Andrei Bobocea: Bucharest University of Econom...,https://econpapers.repec.org/article/aesamfeco...,Batagan Lorena,batagan lorena,batagan|lorena,...,100,"Bucureşti, Romania",edi:aseeero,https://ideas.repec.org/f/pba893.html,lorena batagan,exact,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
108319,H,wiiw Studies on the Integration of Middle East...,,2023.0,Working Paper,,https://econpapers.repec.org/paper/wiipnotes/p...,Landesmann Michael,landesmann michael,,...,90%,"Wien, Austria",edi:wiiwwat,https://ideas.repec.org/f/pla311.html,michael alroy landesmann,subset,landesmann|michael,"{michael, landesmann}",alroy|landesmann|michael,"{alroy, michael, landesmann}"
108320,H,wiiw Studies on the Integration of Middle East...,,2023.0,Working Paper,,https://econpapers.repec.org/paper/wiipnotes/p...,Landesmann Michael,landesmann michael,,...,10%,"Linz, Austria",edi:vlinzat,https://ideas.repec.org/f/pla311.html,michael alroy landesmann,subset,landesmann|michael,"{michael, landesmann}",alroy|landesmann|michael,"{alroy, michael, landesmann}"
108321,H,Long-term care reform and the labor supply of ...,,2015.0,Working Paper,,https://econpapers.repec.org/paper/zbwvfsc15/1...,Geyer Johannes,geyer johannes,,...,99%,"Berlin, Germany",edi:diwbede,https://ideas.repec.org/e/pge81.html,johannes michael geyer,subset,geyer|johannes,"{geyer, johannes}",geyer|johannes|michael,"{geyer, johannes, michael}"
108322,H,Long-term care reform and the labor supply of ...,,2015.0,Working Paper,,https://econpapers.repec.org/paper/zbwvfsc15/1...,Geyer Johannes,geyer johannes,,...,1%,"Bonn, Germany",edi:izaaade,https://ideas.repec.org/e/pge81.html,johannes michael geyer,subset,geyer|johannes,"{geyer, johannes}",geyer|johannes|michael,"{geyer, johannes, michael}"


In [30]:
JEL_machted_final_clean = JEL_matched_final.drop(columns=["Affiliations","author_key_jel","author_key","tokens_jel","author_key_inst","author_std_jel", "tokens_inst",])

In [31]:
JEL_machted_final_clean

Unnamed: 0,JEL Subject,Title,Journal,Year,Type,URL,Author(s),author_name,short_id,institution,share_pct,location,repec_institution_id,author_url,author_std_inst,match_type
0,A,Preparing students for careers using business ...,,2020.0,Working Paper,https://econpapers.repec.org/paper/aahaarhec/2...,Nielsen Steen,"Nielsen, Steen",pni71,Institut for Økonomi Aarhus Universitet,100,"Aarhus, Denmark",edi:ifoaudk,https://ideas.repec.org/e/pni71.html,steen nielsen,exact
1,A,"Measuring Democracy - Eight indices: Polity, F...",,2021.0,Working Paper,https://econpapers.repec.org/paper/aahaarhec/2...,Paldam Martin,"Paldam, Martin",ppa574,School of Economics and Management Institut fo...,100,"Aarhus, Denmark",edi:anaaudk,https://ideas.repec.org/f/ppa574.html,martin paldam,exact
2,A,Oeconstudiet og den ÃÂ¸konomiske faggruppe ve...,,2023.0,Working Paper,https://econpapers.repec.org/paper/aahaarhec/2...,Hylleberg Svend,"Hylleberg, Svend",phy1,Center for Research in Econometric Analysis of...,100,"Aarhus, Denmark",edi:creaudk,https://ideas.repec.org/e/phy1.html,svend hylleberg,exact
3,A,How Do Truckers Perceive and Respond to the Ri...,AEI Economic Perspectives,2024.0,Journal Article,https://econpapers.repec.org/article/aeijournl...,Strain Michael,"Strain, Michael R.",pst593,Institute of Labor Economics (IZA),3%,"Bonn, Germany",edi:izaaade,https://ideas.repec.org/f/pst593.html,michael r strain,exact
4,A,Architecture to Transform Classic Academic Cou...,The AMFITEATRU ECONOMIC journal,2024.0,Journal Article,https://econpapers.repec.org/article/aesamfeco...,Batagan Lorena,"Batagan, Lorena",pba893,Academia de Studii Economice din Bucureşti,100,"Bucureşti, Romania",edi:aseeero,https://ideas.repec.org/f/pba893.html,lorena batagan,exact
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
108319,H,wiiw Studies on the Integration of Middle East...,,2023.0,Working Paper,https://econpapers.repec.org/paper/wiipnotes/p...,Landesmann Michael,"Landesmann, Michael Alroy",pla311,Wiener Institut für Internationale Wirtschafts...,90%,"Wien, Austria",edi:wiiwwat,https://ideas.repec.org/f/pla311.html,michael alroy landesmann,subset
108320,H,wiiw Studies on the Integration of Middle East...,,2023.0,Working Paper,https://econpapers.repec.org/paper/wiipnotes/p...,Landesmann Michael,"Landesmann, Michael Alroy",pla311,Institut für Volkswirtschaftslehre Johannes-Ke...,10%,"Linz, Austria",edi:vlinzat,https://ideas.repec.org/f/pla311.html,michael alroy landesmann,subset
108321,H,Long-term care reform and the labor supply of ...,,2015.0,Working Paper,https://econpapers.repec.org/paper/zbwvfsc15/1...,Geyer Johannes,"Geyer, Johannes Michael",pge81,DIW Berlin (Deutsches Institut für Wirtschafts...,99%,"Berlin, Germany",edi:diwbede,https://ideas.repec.org/e/pge81.html,johannes michael geyer,subset
108322,H,Long-term care reform and the labor supply of ...,,2015.0,Working Paper,https://econpapers.repec.org/paper/zbwvfsc15/1...,Geyer Johannes,"Geyer, Johannes Michael",pge81,Institute of Labor Economics (IZA),1%,"Bonn, Germany",edi:izaaade,https://ideas.repec.org/e/pge81.html,johannes michael geyer,subset


In [32]:
JEL_machted_final_clean.to_csv("final1.csv", index=False)


In [33]:
len(JEL_machted_final_clean)

108324

# code pour vérifier si on cherche qqn dans le dataset

In [57]:
mask = (
    JEL_machted_final_clean["author_name"].str.contains("boyer", case=False, na=False)
    & JEL_machted_final_clean["author_name"].str.contains("pierre", case=False, na=False)
)

JEL_machted_final_clean[mask]


Unnamed: 0,JEL Subject,Title,Journal,Year,Type,URL,Author(s),author_name,short_id,institution,share_pct,location,repec_institution_id,author_url,author_std_inst,match_type
26128,E,Public Debt and the Political Economy of Reforms,American Economic Journal: Microeconomics,2024.0,Journal Article,https://econpapers.repec.org/article/aeaaejmic...,Boyer Pierre,"Boyer, Pierre C.",pbo355,CESifo,1%,"München, Germany",edi:cesifde,https://ideas.repec.org/f/pbo355.html,pierre c boyer,exact
26129,E,Public Debt and the Political Economy of Reforms,American Economic Journal: Microeconomics,2024.0,Journal Article,https://econpapers.repec.org/article/aeaaejmic...,Boyer Pierre,"Boyer, Pierre C.",pbo355,Centre de Recherche en Économie et Statistique...,96%,"Palaiseau, France",edi:crestfr,https://ideas.repec.org/f/pbo355.html,pierre c boyer,exact
26130,E,Public Debt and the Political Economy of Reforms,American Economic Journal: Microeconomics,2024.0,Journal Article,https://econpapers.repec.org/article/aeaaejmic...,Boyer Pierre,"Boyer, Pierre C.",pbo355,Département d'Économie École Polytechnique,2%,"Palaiseau, France",edi:depolfr,https://ideas.repec.org/f/pbo355.html,pierre c boyer,exact
40223,Z,The Lifecycle of Protests in the Digital Age,,2024.0,Working Paper,https://econpapers.repec.org/paper/cprceprdp/1...,Boyer Pierre,"Boyer, Pierre C.",pbo355,CESifo,1%,"München, Germany",edi:cesifde,https://ideas.repec.org/f/pbo355.html,pierre c boyer,exact
40224,Z,The Lifecycle of Protests in the Digital Age,,2024.0,Working Paper,https://econpapers.repec.org/paper/cprceprdp/1...,Boyer Pierre,"Boyer, Pierre C.",pbo355,Centre de Recherche en Économie et Statistique...,96%,"Palaiseau, France",edi:crestfr,https://ideas.repec.org/f/pbo355.html,pierre c boyer,exact
40225,Z,The Lifecycle of Protests in the Digital Age,,2024.0,Working Paper,https://econpapers.repec.org/paper/cprceprdp/1...,Boyer Pierre,"Boyer, Pierre C.",pbo355,Département d'Économie École Polytechnique,2%,"Palaiseau, France",edi:depolfr,https://ideas.repec.org/f/pbo355.html,pierre c boyer,exact
63701,H,Do norms on contribution behavior affect intri...,Journal of Public Economics,2016.0,Journal Article,https://econpapers.repec.org/article/eeepubeco...,Boyer Pierre,"Boyer, Pierre C.",pbo355,CESifo,1%,"München, Germany",edi:cesifde,https://ideas.repec.org/f/pbo355.html,pierre c boyer,exact
63702,H,Do norms on contribution behavior affect intri...,Journal of Public Economics,2016.0,Journal Article,https://econpapers.repec.org/article/eeepubeco...,Boyer Pierre,"Boyer, Pierre C.",pbo355,Centre de Recherche en Économie et Statistique...,96%,"Palaiseau, France",edi:crestfr,https://ideas.repec.org/f/pbo355.html,pierre c boyer,exact
63703,H,Do norms on contribution behavior affect intri...,Journal of Public Economics,2016.0,Journal Article,https://econpapers.repec.org/article/eeepubeco...,Boyer Pierre,"Boyer, Pierre C.",pbo355,Département d'Économie École Polytechnique,2%,"Palaiseau, France",edi:depolfr,https://ideas.repec.org/f/pbo355.html,pierre c boyer,exact
63815,H,Public Debt and the Political Economy of Reforms,American Economic Journal: Microeconomics,2024.0,Journal Article,https://econpapers.repec.org/article/aeaaejmic...,Boyer Pierre,"Boyer, Pierre C.",pbo355,CESifo,1%,"München, Germany",edi:cesifde,https://ideas.repec.org/f/pbo355.html,pierre c boyer,exact


# Stat desc sur cette nouvelle base matchée

In [35]:
df = JEL_matched_final.copy()
df["Year"] = pd.to_numeric(df["Year"], errors="coerce")



In [36]:
df["weight"] = (
    df["share_pct"]
    .astype(str)
    .str.replace("%", "", regex=False)
    .astype(float)
    / 100
)



In [37]:
df["Title"].nunique()        # 36731


43005

In [38]:
df["author_name"].nunique() #11 679

12513

In [39]:
df["institution"].nunique() # 2009

2084

In [40]:
df.groupby("JEL Subject")["Title"].nunique().sort_values(ascending=False)


JEL Subject
H    18642
E    14681
Z     4122
J     3266
B     2722
A     2547
D     1696
O     1189
K      456
Y      354
C       13
Name: Title, dtype: int64

In [41]:
inst_prod = (
    df.groupby("institution")["weight"]
      .sum()
      .sort_values(ascending=False)
)


In [42]:
inst_share = inst_prod / inst_prod.sum()


# Analyse top journaux

In [43]:
top_journals = [
    "American Economic Review",
    "The Quarterly Journal of Economics",
    "Journal of Political Economy",
    "The Review of Economic Studies",
    "Econometrica"
]
#Econometrica et #Journal of Political Economy absent

In [44]:
pd.set_option("display.max_rows", None)

In [45]:
df["Journal"].value_counts()


Journal
DIW Wochenbericht                                                                                                                                                                                       1723
ifo Schnelldienst                                                                                                                                                                                       1293
Journal of Economic Behavior & Organization                                                                                                                                                             1133
European Economic Review                                                                                                                                                                                 994
Wirtschaftsdienst                                                                                                                                                           

In [46]:
df["is_top5_journal"] = df["Journal"].isin(top_journals)


In [47]:
inst_top5 = (
    df[df["is_top5_journal"]]
    .groupby("institution")["weight"]
    .sum()
    .sort_values(ascending=False)
)


In [48]:
inst_top5

institution
Centre de Recerca en Economia Internacional (CREI) Barcelona School of Economics (BSE)                                                                         9.20
Paris School of Economics                                                                                                                                      8.49
Departament d'Economia i Empresa Universitat Pompeu Fabra Barcelona School of Economics (BSE)                                                                  7.84
Department of Economics Sciences économiques Sciences Po                                                                                                       6.99
Institute of Labor Economics (IZA)                                                                                                                             6.17
Volkswirtschaftliche Fakultät Ludwig-Maximilians-Universität München                                                                                           5.38
HEC 

# Part des publications d'une institution qui sont dans le top 5

In [49]:
inst_year_top5 = (
    df[df["is_top5_journal"]]
    .groupby(["Year", "institution"])["weight"]
    .sum()
    .reset_index()
)


In [50]:
inst_year_top5

Unnamed: 0,Year,institution,weight
0,2016.0,Abteilung für Volkswirtschaftslehre Universitä...,1.0
1,2016.0,Barcelona Institute for Political Economy and ...,0.02
2,2016.0,Bruegel,0.3
3,2016.0,CESifo,0.24
4,2016.0,Centre de Recerca en Economia Internacional (C...,0.9
5,2016.0,Centro Studi di Economia e Finanza (CSEF),0.95
6,2016.0,Corvinus Institute for Advanced Studies Budape...,0.3
7,2016.0,DIW Berlin (Deutsches Institut für Wirtschafts...,0.99
8,2016.0,Departament d'Economia i Empresa Universitat P...,0.08
9,2016.0,Department of Economics Trinity College Dublin,0.49


In [51]:
inst_total = (
    df.groupby("institution")["weight"]
      .sum()
)

inst_top5_share = inst_top5 / inst_total


In [52]:
inst_top5_share

institution
"Carlo F. Dondena" Centre for Research on Social Dynamics (DONDENA) Università Commerciale Luigi Bocconi                                                                                                                                                                                                                 NaN
AMURE Centre du Droit et de l'Économie de la Mer                                                                                                                                                                                                                                                                         NaN
Abteilung "Verhalten auf Märkten" Wissenschaftszentrum Berlin für Sozialforschung (WZB)                                                                                                                                                                                                                                  NaN
Abteilung "Ökonomik des Wandels" Wiss

In [53]:
top_authors_top5 = (
    df[df["is_top5_journal"]]
    .groupby("author_name")["Title"]
    .nunique()
    .sort_values(ascending=False)
    .reset_index(name="n_top5_articles")
)



In [54]:
top_authors_top5

Unnamed: 0,author_name,n_top5_articles
0,"Brunnermeier, Markus K.",6
1,"Card, David E.",4
2,"Blanchard, Olivier J",3
3,"Fornaro, Luca",3
4,"Tirole, Jean",3
5,"Herkenhoff, Kyle Frederic",2
6,"Durante, Ruben",2
7,"Bauer, Michael Dominic",2
8,"Alan, Sule",2
9,"Jayachandran, Seema",2


In [55]:
top_inst_top5 = (
    df[df["is_top5_journal"]]
    .groupby("institution")["weight"]
    .sum()
    .sort_values(ascending=False)
    .reset_index(name="top5_output")
)

top_inst_top5.head(20)


Unnamed: 0,institution,top5_output
0,Centre de Recerca en Economia Internacional (C...,9.2
1,Paris School of Economics,8.49
2,Departament d'Economia i Empresa Universitat P...,7.84
3,Department of Economics Sciences économiques S...,6.99
4,Institute of Labor Economics (IZA),6.17
5,Volkswirtschaftliche Fakultät Ludwig-Maximilia...,5.38
6,HEC Paris (École des Hautes Études Commerciales),5.0
7,Institut d'Économie Industrielle (IDEI) Toulou...,5.0
8,Centre de Recherche en Économie et Statistique...,4.96
9,Department of Economics European University In...,4.04
