In [2]:
import re
import unicodedata
import pandas as pd


In [3]:
# Institutions
authors_institution_df = pd.read_csv("data_A_Z.csv")
authors_institution_df["share_pct"] = authors_institution_df["share_pct"].fillna(100)

# RePEc / JEL
A_B_df = pd.read_csv("RePEc_Full_A_B.csv")
D_df   = pd.read_csv("RePEc-D-not-full.csv")
E_Z_df = pd.read_csv("RePEc_Full_E_Z.csv")
O_df = pd.read_csv("RePEc_JEL_O.csv")
H_df = pd.read_csv("RePEc_Full_H.csv")
S_Y_df = pd.read_csv("RePEc-from-S-to-Y-full.csv")
C_df = pd.read_csv("RePEc_C_Full.csv")
G_df = pd.read_csv("RePEc_G_Full.csv")
J_df   = pd.read_csv("J.csv")

JEL_df = pd.concat(
    [A_B_df, D_df, E_Z_df, S_Y_df, J_df, O_df, H_df, G_df, C_df],
    ignore_index=True
).drop_duplicates()


# explosion des co-auteurs

In [4]:
JEL_df = (
    JEL_df
        .dropna(subset=["Author(s)"])
        .assign(Author=lambda x: x["Author(s)"].str.split(";"))
        .explode("Author")
        .assign(Author=lambda x: x["Author"].str.strip())
        .query("Author != ''")
        .drop(columns=["Author(s)"])
        .rename(columns={"Author": "Author(s)"})
        .reset_index(drop=True)
)


# nettoyage des noms

In [5]:
def strip_accents(s: str) -> str:
    s = unicodedata.normalize("NFKD", s)
    return "".join(ch for ch in s if not unicodedata.combining(ch))

def normalize_whitespace(s: str) -> str:
    return re.sub(r"\s+", " ", s).strip()

def standardize_name(raw):
    if raw is None or pd.isna(raw):
        return ""
    s = strip_accents(str(raw).lower())
    if "," in s:
        last, rest = s.split(",", 1)
        s = rest.strip() + " " + last.strip()
    s = re.sub(r"[^\w\s]", " ", s)
    return normalize_whitespace(s)


# standardisation des 2 côtés

In [6]:
JEL_df["author_std"] = JEL_df["Author(s)"].apply(standardize_name)
authors_institution_df["author_std"] = authors_institution_df["author_name"].apply(standardize_name)


# clé auteur principal (ordre libre sans initiales)

In [7]:
def author_key_unordered_no_initial(raw):
    s = standardize_name(raw)
    if not s:
        return ""
    tokens = [t for t in s.split() if len(t) > 1]
    return "|".join(sorted(tokens))


In [8]:
JEL_df["author_key"] = JEL_df["Author(s)"].apply(author_key_unordered_no_initial)
authors_institution_df["author_key"] = authors_institution_df["author_name"].apply(author_key_unordered_no_initial)


In [9]:
JEL_matched = (
    JEL_df
        .merge(
            authors_institution_df,
            on="author_key",
            how="inner",
            suffixes=("_jel", "_inst")
        )
)

JEL_matched["match_type"] = "exact"


In [10]:
JEL_matched["author_name"].nunique()

14546

In [11]:
JEL_matched

Unnamed: 0,JEL Subject,Title,Journal,Year,Type,Affiliations,URL,Author(s),author_std_jel,author_key,author_name,short_id,institution,share_pct,location,repec_institution_id,author_url,author_std_inst,match_type
0,A,Preparing students for careers using business ...,,2020.0,Working Paper,Erland Hejn Nielsen: Department of Economics a...,https://econpapers.repec.org/paper/aahaarhec/2...,Nielsen Steen,nielsen steen,nielsen|steen,"Nielsen, Steen",pni71,Institut for Økonomi Aarhus Universitet,100,"Aarhus, Denmark",edi:ifoaudk,https://ideas.repec.org/e/pni71.html,steen nielsen,exact
1,A,"Measuring Democracy - Eight indices: Polity, F...",,2021.0,Working Paper,Martin Paldam: Department of Economics and Bus...,https://econpapers.repec.org/paper/aahaarhec/2...,Paldam Martin,paldam martin,martin|paldam,"Paldam, Martin",ppa574,School of Economics and Management Institut fo...,100,"Aarhus, Denmark",edi:anaaudk,https://ideas.repec.org/f/ppa574.html,martin paldam,exact
2,A,Oeconstudiet og den ÃÂ¸konomiske faggruppe ve...,,2023.0,Working Paper,Svend Hylleberg: Department of Economics and B...,https://econpapers.repec.org/paper/aahaarhec/2...,Hylleberg Svend,hylleberg svend,hylleberg|svend,"Hylleberg, Svend",phy1,Center for Research in Econometric Analysis of...,100,"Aarhus, Denmark",edi:creaudk,https://ideas.repec.org/e/phy1.html,svend hylleberg,exact
3,A,How Do Truckers Perceive and Respond to the Ri...,AEI Economic Perspectives,2024.0,Journal Article,Michael Strain: American Enterprise Institute,https://econpapers.repec.org/article/aeijournl...,Strain Michael,strain michael,michael|strain,"Strain, Michael R.",pst593,Institute of Labor Economics (IZA),3%,"Bonn, Germany",edi:izaaade,https://ideas.repec.org/f/pst593.html,michael r strain,exact
4,A,Architecture to Transform Classic Academic Cou...,The AMFITEATRU ECONOMIC journal,2024.0,Journal Article,Andrei Bobocea: Bucharest University of Econom...,https://econpapers.repec.org/article/aesamfeco...,Batagan Lorena,batagan lorena,batagan|lorena,"Batagan, Lorena",pba893,Academia de Studii Economice din Bucureşti,100,"Bucureşti, Romania",edi:aseeero,https://ideas.repec.org/f/pba893.html,lorena batagan,exact
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
276486,C,Interest Rate Uncertainty and the Predictabili...,,2021.0,Working Paper,,https://econpapers.repec.org/paper/hhscbsnow/2...,Gupta Rangan,gupta rangan,gupta|rangan,"Gupta, Rangan",pgu80,Institut de Préparation à l'Administration et ...,20%,"Paris, France",edi:ipagpfr,https://ideas.repec.org/e/pgu80.html,rangan gupta,exact
276487,C,Does Unfairness Hurt Women? The Effects of Los...,,2023.0,Working Paper,Stefano Piasenti: Humboldt University,https://econpapers.repec.org/paper/hhslunewp/2...,Valente Marica,valente marica,marica|valente,"Valente, Marica",pva1009,DIW Berlin (Deutsches Institut für Wirtschafts...,47%,"Berlin, Germany",edi:diwbede,https://ideas.repec.org/f/pva1009.html,marica valente,exact
276488,C,Does Unfairness Hurt Women? The Effects of Los...,,2023.0,Working Paper,Stefano Piasenti: Humboldt University,https://econpapers.repec.org/paper/hhslunewp/2...,Valente Marica,valente marica,marica|valente,"Valente, Marica",pva1009,Fakultät für Volkswirtschaft und Statistik Leo...,47%,"Innsbruck, Austria",edi:wuibkat,https://ideas.repec.org/f/pva1009.html,marica valente,exact
276489,C,Does Unfairness Hurt Women? The Effects of Los...,,2023.0,Working Paper,Stefano Piasenti: Humboldt University,https://econpapers.repec.org/paper/hhslunewp/2...,Valente Marica,valente marica,marica|valente,"Valente, Marica",pva1009,Istituto per la Ricerca Valutativa sulle Polit...,6%,"Trento, Italy",edi:irvapit,https://ideas.repec.org/f/pva1009.html,marica valente,exact


# travail sur les non matchés

In [12]:
matched_keys = set(JEL_matched["author_key"])

JEL_unmatched = (
    JEL_df[
        ~JEL_df["author_key"].isin(matched_keys)
    ]
    .copy()
)


In [13]:
def author_tokens(raw):
    s = standardize_name(raw)
    if not s:
        return set()
    return {t for t in s.split() if len(t) > 1}


In [14]:
JEL_unmatched["tokens"] = JEL_unmatched["Author(s)"].apply(author_tokens)
authors_institution_df["tokens"] = authors_institution_df["author_name"].apply(author_tokens)



In [15]:
inst_tokens_list = list(
    zip(
        authors_institution_df["tokens"],
        authors_institution_df["author_name"]
    )
)


In [16]:
from collections import defaultdict

token_index = defaultdict(list)

for _, row in authors_institution_df.iterrows():
    inst_tokens = row["tokens"]
    if len(inst_tokens) < 2:
        continue

    for tok in inst_tokens:
        if len(tok) > 3:
            token_index[tok].append((inst_tokens, row["author_name"]))



In [17]:
def subset_match_simple_fast(row):
    jel_tokens = row["tokens"]
    if len(jel_tokens) < 2:
        return None

    # tokens longs côté JEL
    long_tokens = [t for t in jel_tokens if len(t) > 3]
    if len(long_tokens) < 2:
        return None

    # candidats = institutions partageant ≥1 token long
    candidates = []
    for tok in long_tokens:
        candidates.extend(token_index.get(tok, []))

    for inst_tokens, inst_name in candidates:
        # mots communs longs
        common = {
            t for t in jel_tokens & inst_tokens
            if len(t) > 3
        }

        if len(common) < 2:
            continue

        # inclusion stricte
        if jel_tokens.issubset(inst_tokens) or inst_tokens.issubset(jel_tokens):
            return inst_name

    return None




In [18]:
JEL_unmatched["author_name_subset_strict"] = (
    JEL_unmatched.apply(subset_match_simple_fast, axis=1)
)



In [19]:
subset_simple = JEL_unmatched[
    JEL_unmatched["author_name_subset_strict"].notna()
]


In [20]:
subset_simple[
    ["Author(s)", "author_name_subset_strict"]
].drop_duplicates().sample(50, random_state=0)


Unnamed: 0,Author(s),author_name_subset_strict
86127,Pinto Ligia,"Pinto, Ligia Costa"
1044643,Taurino Serena F.,"Taurino, Serena Fiona"
15473,Albu (Marinescu) Ada,"Albu (Marinescu), Ada Cristina"
92149,Garcia-Swartz Daniel D.,"Garcia, Daniel"
823294,Rodriguez-Casas Pablo,"Casas, Pablo"
431165,Guets Wilfried,"Guets, Wilfried Sr."
78216,Lunn Pete,"Lunn, Pete Daniel"
203326,Nikolov Kalin,"Nikolov, Kalin Ognianov"
45795,DâIppoliti Carlo,"D'Ippoliti, Carlo"
354249,Uddin Gazi A.,"Uddin, Gazi Salah"


In [21]:
subset_simple = subset_simple.copy()

subset_simple["author_name"] = subset_simple["author_name_subset_strict"]


In [22]:
subset_simple = subset_simple.drop(columns=["author_name_subset_strict"])


In [23]:
subset_simple.head()


Unnamed: 0,JEL Subject,Title,Journal,Year,Type,Affiliations,URL,Author(s),author_std,author_key,tokens,author_name
206,A,Influencing ESG Perception in SMEs through CSR...,The AMFITEATRU ECONOMIC journal,2024.0,Journal Article,Jaroslav Belas: Alexander Dubcek University in...,https://econpapers.repec.org/article/aesamfeco...,Balcerzak Adam P.,balcerzak adam p,adam|balcerzak,"{balcerzak, adam}","Balcerzak, Adam P. Sr."
220,A,Size of Government and Economic Growth: A Conv...,The AMFITEATRU ECONOMIC journal,2024.0,Journal Article,Juan David Garcia Gonzalez: University of Alme...,https://econpapers.repec.org/article/aesamfeco...,Manso Jose Ramos Pires,manso jose ramos pires,jose|manso|pires|ramos,"{pires, ramos, jose, manso}","Ramos, Jose"
258,A,Online Teaching Practices and the Effectivenes...,The AMFITEATRU ECONOMIC journal,2020.0,Journal Article,Cristina Venera Tartavulea: The Bucharest Univ...,https://econpapers.repec.org/article/aesamfeco...,Albu Catalin Nicolae,albu catalin nicolae,albu|catalin|nicolae,"{nicolae, albu, catalin}","Albu, Nicolae"
319,A,ESG rating of capital's effect on firms' finan...,Access Journal,2024.0,Journal Article,Sholpan Shalbayeva: Almaty Management Universi...,https://econpapers.repec.org/article/aipaccess...,Ismailov Taner,ismailov taner,ismailov|taner,"{ismailov, taner}","Ismailov, Taner Mustafov"
403,A,A NEW CHALLENGE â INTELLECTUAL CAPITAL EVALU...,Revista Tinerilor Economisti (The Young Econom...,2014.0,Journal Article,Ph. D Student Maria-Luminita Gogan: âPoliteh...,https://econpapers.repec.org/article/aiorteyej...,Gogan Ph. D Student Maria-Luminita,gogan ph d student maria luminita,gogan|luminita|maria|ph|student,"{gogan, ph, maria, luminita, student}","Gogan, Luminita Maria"


In [24]:
subset_simple_merged = (
    subset_simple
        .merge(
            authors_institution_df,
            on="author_name",
            how="inner",
            suffixes=("_jel", "_inst")
        )
)




In [25]:
subset_simple_merged.shape



(34746, 21)

In [26]:
subset_simple_merged.head(50)


Unnamed: 0,JEL Subject,Title,Journal,Year,Type,Affiliations,URL,Author(s),author_std_jel,author_key_jel,...,author_name,short_id,institution,share_pct,location,repec_institution_id,author_url,author_std_inst,author_key_inst,tokens_inst
0,A,Influencing ESG Perception in SMEs through CSR...,The AMFITEATRU ECONOMIC journal,2024.0,Journal Article,Jaroslav Belas: Alexander Dubcek University in...,https://econpapers.repec.org/article/aesamfeco...,Balcerzak Adam P.,balcerzak adam p,adam|balcerzak,...,"Balcerzak, Adam P. Sr.",pba1247,Wydział Nauk Ekonomicznych Universytet Warmińs...,100,"Olsztyn, Poland",edi:wnuwmpl,https://ideas.repec.org/f/pba1247.html,adam p sr balcerzak,adam|balcerzak|sr,"{balcerzak, sr, adam}"
1,A,Size of Government and Economic Growth: A Conv...,The AMFITEATRU ECONOMIC journal,2024.0,Journal Article,Juan David Garcia Gonzalez: University of Alme...,https://econpapers.repec.org/article/aesamfeco...,Manso Jose Ramos Pires,manso jose ramos pires,jose|manso|pires|ramos,...,"Ramos, Jose",pra393,Instituto Valenciano de Investigaciones Económ...,100,"Valencia, Spain",edi:ievages,https://ideas.repec.org/f/pra393.html,jose ramos,jose|ramos,"{ramos, jose}"
2,A,Online Teaching Practices and the Effectivenes...,The AMFITEATRU ECONOMIC journal,2020.0,Journal Article,Cristina Venera Tartavulea: The Bucharest Univ...,https://econpapers.repec.org/article/aesamfeco...,Albu Catalin Nicolae,albu catalin nicolae,albu|catalin|nicolae,...,"Albu, Nicolae",pal605,Facultatea de Ştiinţe Economice Universitatea ...,100,"Brasov, Romania",edi:fetbvro,https://ideas.repec.org/f/pal605.html,nicolae albu,albu|nicolae,"{nicolae, albu}"
3,A,ESG rating of capital's effect on firms' finan...,Access Journal,2024.0,Journal Article,Sholpan Shalbayeva: Almaty Management Universi...,https://econpapers.repec.org/article/aipaccess...,Ismailov Taner,ismailov taner,ismailov|taner,...,"Ismailov, Taner Mustafov",pis198,D. A. Tsenov Academy of Economics,100,"Svishtov, Bulgaria",edi:tsenobg,https://ideas.repec.org/e/pis198.html,taner mustafov ismailov,ismailov|mustafov|taner,"{mustafov, taner, ismailov}"
4,A,A NEW CHALLENGE â INTELLECTUAL CAPITAL EVALU...,Revista Tinerilor Economisti (The Young Econom...,2014.0,Journal Article,Ph. D Student Maria-Luminita Gogan: âPoliteh...,https://econpapers.repec.org/article/aiorteyej...,Gogan Ph. D Student Maria-Luminita,gogan ph d student maria luminita,gogan|luminita|maria|ph|student,...,"Gogan, Luminita Maria",pgo683,Facultatea de Management în Producţie şi Trans...,100,"Timişoara, Romania",edi:ptuptro,https://ideas.repec.org/f/pgo683.html,luminita maria gogan,gogan|luminita|maria,"{luminita, gogan, maria}"
5,A,Sustainability Reporting and the Impact on Acc...,CECCAR Business Review,2024.0,Journal Article,Robert-Aurelian Èova: Bucharest University of...,https://econpapers.repec.org/article/ahdjournl...,Popa Adriana Florina,popa adriana florina,adriana|florina|popa,...,"Popa, Florina",ppo505,Institutul de Economie Nationala Institutul Na...,100,"Bucureşti, Romania",edi:inacaro,https://ideas.repec.org/f/ppo505.html,florina popa,florina|popa,"{popa, florina}"
6,A,"The Accounting Education, Between Digitalisati...",CECCAR Business Review,2020.0,Journal Article,Robert-Aurelian Èova: Bucharest University of...,https://econpapers.repec.org/article/ahdjournl...,Popa Adriana Florina,popa adriana florina,adriana|florina|popa,...,"Popa, Florina",ppo505,Institutul de Economie Nationala Institutul Na...,100,"Bucureşti, Romania",edi:inacaro,https://ideas.repec.org/f/ppo505.html,florina popa,florina|popa,"{popa, florina}"
7,A,Accounting Education â Between Digitalisatio...,CECCAR Business Review,2020.0,Journal Article,Robert-Aurelian Èova: Bucharest University of...,https://econpapers.repec.org/article/ahdjournl...,Popa Adriana Florina,popa adriana florina,adriana|florina|popa,...,"Popa, Florina",ppo505,Institutul de Economie Nationala Institutul Na...,100,"Bucureşti, Romania",edi:inacaro,https://ideas.repec.org/f/ppo505.html,florina popa,florina|popa,"{popa, florina}"
8,A,ECONOMICS OF INTEGRATION. A DEBATE OUTLINE,Internal Auditing and Risk Management,2015.0,Journal Article,,https://econpapers.repec.org/article/athjournl...,Andrei Liviu,andrei liviu,andrei|liviu,...,"Andrei, Liviu Catalin",pan318,Facultatea de Administraţie Publică Şcoala Naţ...,100,"Bucureşti, Romania",edi:fasnsro,https://ideas.repec.org/f/pan318.html,liviu catalin andrei,andrei|catalin|liviu,"{andrei, catalin, liviu}"
9,A,"THE HEALTH OF ACADEMIC ORGANIZATION, A PROJECT...",Management Strategies Journal,2017.0,Journal Article,Vasile Miltiade Stanciu: Spiru Haret Universit...,https://econpapers.repec.org/article/brcjournl...,Stanciu Vasile Miltiade,stanciu vasile miltiade,miltiade|stanciu|vasile,...,"Stanciu, Miltiade",pst278,Academia de Studii Economice din Bucureşti,100,"Bucureşti, Romania",edi:aseeero,https://ideas.repec.org/f/pst278.html,miltiade stanciu,miltiade|stanciu,"{stanciu, miltiade}"


In [27]:
subset_simple_merged["match_type"] = "subset"



In [28]:
JEL_matched_final = pd.concat(
    [JEL_matched, subset_simple_merged],
    axis=0,
    ignore_index=True
)


In [60]:
JEL_matched_final["Author(s)"].nunique()

19688

# cleaning

In [30]:
JEL_matched_final

Unnamed: 0,JEL Subject,Title,Journal,Year,Type,Affiliations,URL,Author(s),author_std_jel,author_key,...,share_pct,location,repec_institution_id,author_url,author_std_inst,match_type,author_key_jel,tokens_jel,author_key_inst,tokens_inst
0,A,Preparing students for careers using business ...,,2020.0,Working Paper,Erland Hejn Nielsen: Department of Economics a...,https://econpapers.repec.org/paper/aahaarhec/2...,Nielsen Steen,nielsen steen,nielsen|steen,...,100,"Aarhus, Denmark",edi:ifoaudk,https://ideas.repec.org/e/pni71.html,steen nielsen,exact,,,,
1,A,"Measuring Democracy - Eight indices: Polity, F...",,2021.0,Working Paper,Martin Paldam: Department of Economics and Bus...,https://econpapers.repec.org/paper/aahaarhec/2...,Paldam Martin,paldam martin,martin|paldam,...,100,"Aarhus, Denmark",edi:anaaudk,https://ideas.repec.org/f/ppa574.html,martin paldam,exact,,,,
2,A,Oeconstudiet og den ÃÂ¸konomiske faggruppe ve...,,2023.0,Working Paper,Svend Hylleberg: Department of Economics and B...,https://econpapers.repec.org/paper/aahaarhec/2...,Hylleberg Svend,hylleberg svend,hylleberg|svend,...,100,"Aarhus, Denmark",edi:creaudk,https://ideas.repec.org/e/phy1.html,svend hylleberg,exact,,,,
3,A,How Do Truckers Perceive and Respond to the Ri...,AEI Economic Perspectives,2024.0,Journal Article,Michael Strain: American Enterprise Institute,https://econpapers.repec.org/article/aeijournl...,Strain Michael,strain michael,michael|strain,...,3%,"Bonn, Germany",edi:izaaade,https://ideas.repec.org/f/pst593.html,michael r strain,exact,,,,
4,A,Architecture to Transform Classic Academic Cou...,The AMFITEATRU ECONOMIC journal,2024.0,Journal Article,Andrei Bobocea: Bucharest University of Econom...,https://econpapers.repec.org/article/aesamfeco...,Batagan Lorena,batagan lorena,batagan|lorena,...,100,"Bucureşti, Romania",edi:aseeero,https://ideas.repec.org/f/pba893.html,lorena batagan,exact,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
311232,C,Fighting terrorism in Africa: Benchmarking pol...,Physica A: Statistical Mechanics and its Appli...,2018.0,Journal Article,,https://econpapers.repec.org/article/eeephsmap...,Tchamyou Vanessa,tchamyou vanessa,,...,1%,"Antwerpen, Belgium",edi:vaufsbe,https://ideas.repec.org/e/ptc17.html,vanessa simen tchamyou,subset,tchamyou|vanessa,"{tchamyou, vanessa}",simen|tchamyou|vanessa,"{tchamyou, simen, vanessa}"
311233,C,Fighting terrorism in Africa: Benchmarking pol...,Physica A: Statistical Mechanics and its Appli...,2018.0,Journal Article,,https://econpapers.repec.org/article/eeephsmap...,Tchamyou Vanessa,tchamyou vanessa,,...,1%,"Liège, Belgium",edi:feulgbe,https://ideas.repec.org/e/ptc17.html,vanessa simen tchamyou,subset,tchamyou|vanessa,"{tchamyou, vanessa}",simen|tchamyou|vanessa,"{tchamyou, simen, vanessa}"
311234,C,Conditional market timing in the mutual fund i...,Research in International Business and Finance,2017.0,Journal Article,,https://econpapers.repec.org/article/eeeriibaf...,Tchamyou Vanessa,tchamyou vanessa,,...,1%,"Antwerpen, Belgium",edi:vaufsbe,https://ideas.repec.org/e/ptc17.html,vanessa simen tchamyou,subset,tchamyou|vanessa,"{tchamyou, vanessa}",simen|tchamyou|vanessa,"{tchamyou, simen, vanessa}"
311235,C,Conditional market timing in the mutual fund i...,Research in International Business and Finance,2017.0,Journal Article,,https://econpapers.repec.org/article/eeeriibaf...,Tchamyou Vanessa,tchamyou vanessa,,...,1%,"Liège, Belgium",edi:feulgbe,https://ideas.repec.org/e/ptc17.html,vanessa simen tchamyou,subset,tchamyou|vanessa,"{tchamyou, vanessa}",simen|tchamyou|vanessa,"{tchamyou, simen, vanessa}"


In [31]:
JEL_machted_final_clean = JEL_matched_final.drop(columns=["Affiliations","author_key_jel","author_key","tokens_jel","author_key_inst","author_std_jel", "tokens_inst",])

In [32]:
JEL_machted_final_clean

Unnamed: 0,JEL Subject,Title,Journal,Year,Type,URL,Author(s),author_name,short_id,institution,share_pct,location,repec_institution_id,author_url,author_std_inst,match_type
0,A,Preparing students for careers using business ...,,2020.0,Working Paper,https://econpapers.repec.org/paper/aahaarhec/2...,Nielsen Steen,"Nielsen, Steen",pni71,Institut for Økonomi Aarhus Universitet,100,"Aarhus, Denmark",edi:ifoaudk,https://ideas.repec.org/e/pni71.html,steen nielsen,exact
1,A,"Measuring Democracy - Eight indices: Polity, F...",,2021.0,Working Paper,https://econpapers.repec.org/paper/aahaarhec/2...,Paldam Martin,"Paldam, Martin",ppa574,School of Economics and Management Institut fo...,100,"Aarhus, Denmark",edi:anaaudk,https://ideas.repec.org/f/ppa574.html,martin paldam,exact
2,A,Oeconstudiet og den ÃÂ¸konomiske faggruppe ve...,,2023.0,Working Paper,https://econpapers.repec.org/paper/aahaarhec/2...,Hylleberg Svend,"Hylleberg, Svend",phy1,Center for Research in Econometric Analysis of...,100,"Aarhus, Denmark",edi:creaudk,https://ideas.repec.org/e/phy1.html,svend hylleberg,exact
3,A,How Do Truckers Perceive and Respond to the Ri...,AEI Economic Perspectives,2024.0,Journal Article,https://econpapers.repec.org/article/aeijournl...,Strain Michael,"Strain, Michael R.",pst593,Institute of Labor Economics (IZA),3%,"Bonn, Germany",edi:izaaade,https://ideas.repec.org/f/pst593.html,michael r strain,exact
4,A,Architecture to Transform Classic Academic Cou...,The AMFITEATRU ECONOMIC journal,2024.0,Journal Article,https://econpapers.repec.org/article/aesamfeco...,Batagan Lorena,"Batagan, Lorena",pba893,Academia de Studii Economice din Bucureşti,100,"Bucureşti, Romania",edi:aseeero,https://ideas.repec.org/f/pba893.html,lorena batagan,exact
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
311232,C,Fighting terrorism in Africa: Benchmarking pol...,Physica A: Statistical Mechanics and its Appli...,2018.0,Journal Article,https://econpapers.repec.org/article/eeephsmap...,Tchamyou Vanessa,"Tchamyou, Vanessa Simen",ptc17,Departement Algemene economie Faculteit Bedrij...,1%,"Antwerpen, Belgium",edi:vaufsbe,https://ideas.repec.org/e/ptc17.html,vanessa simen tchamyou,subset
311233,C,Fighting terrorism in Africa: Benchmarking pol...,Physica A: Statistical Mechanics and its Appli...,2018.0,Journal Article,https://econpapers.repec.org/article/eeephsmap...,Tchamyou Vanessa,"Tchamyou, Vanessa Simen",ptc17,HEC École de Gestion Université de Liège,1%,"Liège, Belgium",edi:feulgbe,https://ideas.repec.org/e/ptc17.html,vanessa simen tchamyou,subset
311234,C,Conditional market timing in the mutual fund i...,Research in International Business and Finance,2017.0,Journal Article,https://econpapers.repec.org/article/eeeriibaf...,Tchamyou Vanessa,"Tchamyou, Vanessa Simen",ptc17,Departement Algemene economie Faculteit Bedrij...,1%,"Antwerpen, Belgium",edi:vaufsbe,https://ideas.repec.org/e/ptc17.html,vanessa simen tchamyou,subset
311235,C,Conditional market timing in the mutual fund i...,Research in International Business and Finance,2017.0,Journal Article,https://econpapers.repec.org/article/eeeriibaf...,Tchamyou Vanessa,"Tchamyou, Vanessa Simen",ptc17,HEC École de Gestion Université de Liège,1%,"Liège, Belgium",edi:feulgbe,https://ideas.repec.org/e/ptc17.html,vanessa simen tchamyou,subset


In [33]:
JEL_machted_final_clean.to_csv("final1.csv", index=False)


In [34]:
len(JEL_machted_final_clean)

311237

# code pour vérifier si on cherche qqn dans le dataset

In [57]:
mask = (
    JEL_machted_final_clean["author_name"].str.contains("pape", case=False, na=False)
    & JEL_machted_final_clean["author_name"].str.contains("louis", case=False, na=False)
)

JEL_machted_final_clean[mask]


Unnamed: 0,JEL Subject,Title,Journal,Year,Type,URL,Author(s),author_name,short_id,institution,share_pct,location,repec_institution_id,author_url,author_std_inst,match_type
56062,J,"Wages, hires, and labor market concentration",Journal of Economic Behavior & Organization,2021.0,Journal Article,https://econpapers.repec.org/article/eeejeborg...,Pape Louis-Daniel,"Pape, Louis-Daniel",ppa1332,Département Sciences Économiques et Sociales É...,10%,"Paris, France",edi:denstfr,https://ideas.repec.org/f/ppa1332.html,louis daniel pape,exact
56063,J,"Wages, hires, and labor market concentration",Journal of Economic Behavior & Organization,2021.0,Journal Article,https://econpapers.repec.org/article/eeejeborg...,Pape Louis-Daniel,"Pape, Louis-Daniel",ppa1332,Centre de Recherche en Économie et Statistique...,90%,"Palaiseau, France",edi:crestfr,https://ideas.repec.org/f/ppa1332.html,louis daniel pape,exact
56185,J,"Wages, Hires, and Labor Market Concentration",,2020.0,Working Paper,https://econpapers.repec.org/paper/nbrnberwo/2...,Pape Louis-Daniel,"Pape, Louis-Daniel",ppa1332,Département Sciences Économiques et Sociales É...,10%,"Paris, France",edi:denstfr,https://ideas.repec.org/f/ppa1332.html,louis daniel pape,exact
56186,J,"Wages, Hires, and Labor Market Concentration",,2020.0,Working Paper,https://econpapers.repec.org/paper/nbrnberwo/2...,Pape Louis-Daniel,"Pape, Louis-Daniel",ppa1332,Centre de Recherche en Économie et Statistique...,90%,"Palaiseau, France",edi:crestfr,https://ideas.repec.org/f/ppa1332.html,louis daniel pape,exact
89801,J,"Wages, Hires, and Labor Market Concentration",,2020.0,Working Paper,https://econpapers.repec.org/paper/izaizadps/d...,Pape Louis-Daniel,"Pape, Louis-Daniel",ppa1332,Département Sciences Économiques et Sociales É...,10%,"Paris, France",edi:denstfr,https://ideas.repec.org/f/ppa1332.html,louis daniel pape,exact
89802,J,"Wages, Hires, and Labor Market Concentration",,2020.0,Working Paper,https://econpapers.repec.org/paper/izaizadps/d...,Pape Louis-Daniel,"Pape, Louis-Daniel",ppa1332,Centre de Recherche en Économie et Statistique...,90%,"Palaiseau, France",edi:crestfr,https://ideas.repec.org/f/ppa1332.html,louis daniel pape,exact
245485,C,Dealing with Logs and Zeros in Regression Models,,2022.0,Working Paper,https://econpapers.repec.org/paper/crswpaper/2...,Pape Louis-Daniel,"Pape, Louis-Daniel",ppa1332,Département Sciences Économiques et Sociales É...,10%,"Paris, France",edi:denstfr,https://ideas.repec.org/f/ppa1332.html,louis daniel pape,exact
245486,C,Dealing with Logs and Zeros in Regression Models,,2022.0,Working Paper,https://econpapers.repec.org/paper/crswpaper/2...,Pape Louis-Daniel,"Pape, Louis-Daniel",ppa1332,Centre de Recherche en Économie et Statistique...,90%,"Palaiseau, France",edi:crestfr,https://ideas.repec.org/f/ppa1332.html,louis daniel pape,exact


# Stat desc sur cette nouvelle base matchée

In [36]:
df = JEL_matched_final.copy()
df["Year"] = pd.to_numeric(df["Year"], errors="coerce")



In [37]:
df["weight"] = (
    df["share_pct"]
    .astype(str)
    .str.replace("%", "", regex=False)
    .astype(float)
    / 100
)



In [38]:
df["Title"].nunique()        # 36731


96278

In [67]:
df["author_name"].nunique() #11 679

16226

In [73]:
df[df["author_name"] == "Mora, Juan"]["Author(s)"].value_counts().head(15)


Author(s)
Mora Juan                       7
Mora Juan Carlos                3
MuÃ±oz Mora Juan                2
Franco Mora Juan SebastiÃ¡n     2
Franco Mora Juan                2
Palacios Mora Juan Carlos       2
Seck-Tuoh-Mora Juan Carlos      2
Mora Juan Carlos Garmendia      1
Ortega-MorÃ¡n Juan Francisco    1
Carlos Muï¿½oz-Mora Juan        1
MuÃ±oz-Mora Juan Carlos         1
Arroyo-Mora Juan Pablo          1
Carlos MuÃÃ¸Î©oz Mora Juan     1
MorÃ¡n-Ãlvarez Juan Carlos     1
Name: count, dtype: int64

In [65]:
df.groupby("author_name")["Author(s)"].nunique().sort_values(ascending=False).head(10)


author_name
Nguyen, Hoang         40
Maria, Jose R.        19
Ruiz, Juan M.         18
Herrera, Luis         17
Mora, Juan            14
Rojas, Juan           13
Silva, Pedro M.       13
Hernandez, JOSE A.    12
Silva, Maria C.A.     12
Bilal, Muhammad       12
Name: Author(s), dtype: int64

In [40]:
df["institution"].nunique() # 2009

2279

In [61]:
df.groupby("JEL Subject")["Title"].nunique().sort_values(ascending=False)


JEL Subject
C    35739
J    26688
G    23406
H    18642
E    14681
Z     4122
B     2722
A     2547
D     1696
O     1189
Y      354
Name: Title, dtype: int64

In [42]:
inst_prod = (
    df.groupby("institution")["weight"]
      .sum()
      .sort_values(ascending=False)
)


In [43]:
inst_share = inst_prod / inst_prod.sum()


# Analyse top journaux

In [64]:
top_journals = [
    "American Economic Review",
    "The Quarterly Journal of Economics",
    "Journal of Political Economy",
    "The Review of Economic Studies",
    "Econometrica"
]
#Econometrica et #Journal of Political Economy absent

In [45]:
pd.set_option("display.max_rows", None)

In [46]:
df["Journal"].value_counts()


Journal
Journal of Economic Behavior & Organization                                                                                                                                                             3521
DIW Wochenbericht                                                                                                                                                                                       2592
ifo Schnelldienst                                                                                                                                                                                       2310
Economics Letters                                                                                                                                                                                       2292
European Economic Review                                                                                                                                                    

In [47]:
df["is_top5_journal"] = df["Journal"].isin(top_journals)


In [48]:
inst_top5 = (
    df[df["is_top5_journal"]]
    .groupby("institution")["weight"]
    .sum()
    .sort_values(ascending=False)
)


In [49]:
inst_top5

institution
Paris School of Economics                                                                                                                                                       19.49
Institute of Labor Economics (IZA)                                                                                                                                              14.89
Department of Economics Sciences économiques Sciences Po                                                                                                                        14.69
Centre de Recerca en Economia Internacional (CREI) Barcelona School of Economics (BSE)                                                                                          13.81
Innocenzo Gasparini Institute for Economic Research (IGIER) Università Commerciale Luigi Bocconi                                                                                13.22
Departament d'Economia i Empresa Universitat Pompeu Fabra Barcelona School of 

# Part des publications d'une institution qui sont dans le top 5

In [50]:
inst_year_top5 = (
    df[df["is_top5_journal"]]
    .groupby(["Year", "institution"])["weight"]
    .sum()
    .reset_index()
)


In [51]:
inst_year_top5

Unnamed: 0,Year,institution,weight
0,2016.0,Abteilung für Volkswirtschaftslehre Universitä...,2.0
1,2016.0,Banco de España,0.2
2,2016.0,Banque de France,0.46
3,2016.0,Barcelona Institute for Political Economy and ...,0.02
4,2016.0,Bruegel,0.45
5,2016.0,CESifo,0.28
6,2016.0,Center for Economic Research and Graduate Educ...,3.66
7,2016.0,Center for Research in Econometric Analysis of...,0.33
8,2016.0,Centre de Recerca en Economia Internacional (C...,0.9
9,2016.0,Centre de Recherche en Économie et Statistique...,0.35


In [52]:
inst_total = (
    df.groupby("institution")["weight"]
      .sum()
)

inst_top5_share = inst_top5 / inst_total


In [53]:
inst_top5_share

institution
"Carlo F. Dondena" Centre for Research on Social Dynamics (DONDENA) Università Commerciale Luigi Bocconi                                                                                                                                                                                                                 NaN
ALBA Graduate Business School American College of Greece                                                                                                                                                                                                                                                                 NaN
AMURE Centre du Droit et de l'Économie de la Mer                                                                                                                                                                                                                                                                         NaN
Abteilung "Verhalten auf Märkten" Wis

In [54]:
top_authors_top5 = (
    df[df["is_top5_journal"]]
    .groupby("author_name")["Title"]
    .nunique()
    .sort_values(ascending=False)
    .reset_index(name="n_top5_articles")
)



In [55]:
top_authors_top5

Unnamed: 0,author_name,n_top5_articles
0,"Brunnermeier, Markus K.",7
1,"Haltiwanger, John",5
2,"Card, David E.",5
3,"Jayachandran, Seema",4
4,"Autor, David",4
5,"Notowidigdo, Matthew J.",4
6,"Blundell, Richard",4
7,"Gorodnichenko, Yuriy",3
8,"Lauermann, Stephan",3
9,"Pinotti, Paolo",3


In [56]:
top_inst_top5 = (
    df[df["is_top5_journal"]]
    .groupby("institution")["weight"]
    .sum()
    .sort_values(ascending=False)
    .reset_index(name="top5_output")
)

top_inst_top5.head(20)


Unnamed: 0,institution,top5_output
0,Paris School of Economics,19.49
1,Institute of Labor Economics (IZA),14.89
2,Department of Economics Sciences économiques S...,14.69
3,Centre de Recerca en Economia Internacional (C...,13.81
4,Innocenzo Gasparini Institute for Economic Res...,13.22
5,Departament d'Economia i Empresa Universitat P...,12.96
6,ifo Institut - Leibniz-Institut für Wirtschaft...,11.8
7,Abteilung für Volkswirtschaftslehre Universitä...,11.79
8,Centre de Recherche en Économie et Statistique...,10.28
9,Volkswirtschaftliche Fakultät Ludwig-Maximilia...,10.17
