# Create points table and author list

In [1]:
from __future__ import annotations

import os
import sys
from pathlib import Path

project_path = Path(os.getcwd()) / ".."

sys.path.append(str(project_path / "docs" / "mmteb"))

In [2]:
from create_points_table import load_data

## Point table

In [3]:
df = load_data()
df = df.groupby("GitHub").sum().astype(int)
# create a new column with the sum of the points
df["Total"] = df.sum(axis=1)
df = df.sort_values("Total", ascending=False)
# total as first column
df = df[["Total"] + [col for col in df.columns if col != "Total"]]

In [4]:
df

Unnamed: 0_level_0,Total,Bug fixes,Review PR,New dataset,Dataset annotations,Paper writing,New task,Coordination,Running Models
GitHub,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
KennethEnevoldsen,591,85,322,68,35,0,0,81,0
isaac-chung,433,50,194,120,1,12,2,54,0
imenelydiaker,358,24,144,120,0,0,0,70,0
awinml,302,0,2,300,0,0,0,0,0
x-tabdeveloping,239,10,32,144,0,0,12,41,0
...,...,...,...,...,...,...,...,...,...
antoniolanza1996,2,2,0,0,0,0,0,0,0
bakrianoo,2,0,0,2,0,0,0,0,0
cslizc,2,0,0,2,0,0,0,0,0
hanhainebula,2,0,0,2,0,0,0,0,0


In [5]:
print(
    df.to_latex(
        longtable=True,
        caption="Contributions by GitHub users. See \autoref{tab:authors} for the mapping between authors and GitHub handles.",
        label="tab:contributions",
    )
)

\begin{longtable}{lrrrrrrrrr}
\caption{Contributions by GitHub users. See utoref{tab:authors} for the mapping between authors and GitHub handles.} \label{tab:contributions} \\
\toprule
 & Total & Bug fixes & Review PR & New dataset & Dataset annotations & Paper writing & New task & Coordination & Running Models \\
GitHub &  &  &  &  &  &  &  &  &  \\
\midrule
\endfirsthead
\caption[]{Contributions by GitHub users. See utoref{tab:authors} for the mapping between authors and GitHub handles.} \\
\toprule
 & Total & Bug fixes & Review PR & New dataset & Dataset annotations & Paper writing & New task & Coordination & Running Models \\
GitHub &  &  &  &  &  &  &  &  &  \\
\midrule
\endhead
\midrule
\multicolumn{10}{r}{Continued on next page} \\
\midrule
\endfoot
\bottomrule
\endlastfoot
KennethEnevoldsen & 591 & 85 & 322 & 68 & 35 & 0 & 0 & 81 & 0 \\
isaac-chung & 433 & 50 & 194 & 120 & 1 & 12 & 2 & 54 & 0 \\
imenelydiaker & 358 & 24 & 144 & 120 & 0 & 0 & 0 & 70 & 0 \\
awinml & 302 & 0 & 2

# Contributor affiliations

In [6]:
points_to_authors = project_path / "docs" / "mmteb" / "points.md"

# extract table from markdown file
with open(points_to_authors) as f:
    lines = f.readlines()

table = False
table_lines = []
colnames = []
head_skipped = False
for line in lines:
    if not table and line.startswith("|"):
        table = True
        colnames = [c.strip() for c in line.strip().split("|")[1:-1]]
        continue
    if colnames and table and not head_skipped:
        head_skipped = True
        continue
    if table:
        table_lines.append([c.strip() for c in line.strip().split("|")[1:-1]])
    if table and line.strip() == "":
        break

In [7]:
# create a dataframe from the table
import pandas as pd

author_df = pd.DataFrame(table_lines, columns=colnames)

In [8]:
author_df

Unnamed: 0,GitHub,First name,Last name,Email,User on openreview,Affiliations
0,KennethEnevoldsen,Kenneth,Enevoldsen,kennethcenevoldsen@gmail.com,~Kenneth_Enevoldsen1,"Aarhus University, Denmark"
1,x-tabdeveloping,Márton,Kardos,martonkardos@cas.au.dk,~Márton_Kardos1,"Aarhus University, Denmark"
2,imenelydiaker,Imene,Kerboua,,,"Esker, Lyon, France && INSA Lyon, LIRIS, Lyon,..."
3,wissam-sib,Wissam,Siblini,wissam.siblini92@gmail.com,~Wissam_Siblini1,
4,GabrielSequeira,Gabriel,Sequeira,,,
...,...,...,...,...,...,...
69,sivareddyg,Siva,Reddy,siva.reddy@mila.quebec,~Siva_Reddy1,McGill University && Mila - Quebec AI Institut...
70,guenthermi,Michael,Günther,michael.guenther@jina.ai,~Michael_Günther1,Jina AI
71,violenil,Isabelle,Mohr,isabelle.mohr@jina.ai,~Isabelle_Mohr1,Jina AI
72,Muennighoff,Niklas,Muennighoff,n.muennighoff@gmail.com,,Contextual AI


In [9]:
print(
    author_df[["GitHub", "First name", "Last name", "Affiliations"]].to_latex(
        index=False
    )
)

\begin{tabular}{llll}
\toprule
GitHub & First name & Last name & Affiliations \\
\midrule
KennethEnevoldsen & Kenneth & Enevoldsen & Aarhus University, Denmark \\
x-tabdeveloping & Márton & Kardos & Aarhus University, Denmark \\
imenelydiaker & Imene & Kerboua & Esker, Lyon, France && INSA Lyon, LIRIS, Lyon, France \\
wissam-sib & Wissam & Siblini & N/A \\
GabrielSequeira & Gabriel & Sequeira & N/A \\
schmarion & Marion & Schaeffer & Wikit, Lyon, France \\
MathieuCiancone & Mathieu & Ciancone & Wikit, Lyon, France \\
MartinBernstorff & Martin & Bernstorff & Aarhus University, Denmark \\
staoxiao & Shitao & Xiao & Beijing Academy of Artificial Intelligence \\
ZhengLiu101 & Zheng & Liu & Beijing Academy of Artificial Intelligence \\
achibb & Aaron & Chibb & N/A \\
cassanof & Federico & Cassano & Northeastern University, Boston, USA \\
taidnguyen & Nguyen & Tai & University of Pennsylvania \\
xu3kev & Wen-Ding & Li & Cornell University \\
Rysias & Jonathan & Rystrøm & University of Oxford

# Author list

In [10]:
github = set(author_df["GitHub"])

not_10 = []

df = df.reset_index()
# check if all github users are in the points table and has 10 total point
for gh in github:
    if gh not in set(df["GitHub"]):
        print(f"{gh} not in points table")

    if df[df["GitHub"] == gh]["Total"].values[0] < 10:
        print(f"{gh} has less than 10 points")
        not_10.append(gh)

achibb has less than 10 points
izhx has less than 10 points


In [19]:
missing_users = [user for user in df[df["Total"] > 10]["GitHub"] if user not in github]
print(missing_users)

['akshita-sukhlecha', 'loicmagne', 'mrshu', 'crystina-z', 'thakur-nandan', 'xhluca']


In [11]:
# sort author_df by total points
author_df = pd.merge(author_df, df[["GitHub", "Total"]], on="GitHub", how="left")
author_df = author_df.sort_values("Total", ascending=False)

In [191]:
# create a latex author list
# \textbf{First Last \textsuperscript{1}},
# \textbf{First Last \textsuperscript{1}},
# [if too long add \\]
# ...
# \\
# \\
# \textsuperscript{1}Aarhus University, Denmark,
# ...
# [if too long add \\]

In [192]:
author_list = []
affiations = {}

aff_id = 1
for i, row in author_df.iterrows():
    author = row["First name"] + " " + row["Last name"]
    if row["GitHub"] in not_10:
        continue
    author_str = f"\\textbf{{{author}"

    if row["Affiliations"]:
        affiliations = row["Affiliations"].split("&&")

        for aff in affiliations:
            if "N/A" in aff:
                continue
            if aff not in affiations:
                affiations[aff] = aff_id
                aff_id += 1
            author_str += f"\\textsuperscript{{{affiations[aff]}}}"

    # if row["Affiliations"] not in affiations and row["Affiliations"]:
    #     affiations[row["Affiliations"]] = aff_id
    #     aff_id += 1
    #     author_str += f"\\textsuperscript{{{affiations[row['Affiliations']]}}}"
    author_str += "}"
    author_list.append(author_str)

In [193]:
# create the latex string

latex = ""
line_length = 0
max_line_length = 80

for i, author in enumerate(author_list):
    _line_length = len(author.split("\\textsuperscript")[0])
    if line_length + _line_length > max_line_length:
        latex += "\\\\\n"
        line_length = 0
    latex += author + ", \n"
    line_length += _line_length

# add the affiliations
line_length = 0
latex += "\\\\\n"
latex += "\\\\\n"
for aff, id in affiations.items():
    if "N/A" in aff:
        continue
    _line_length = len(aff)
    if line_length + _line_length > max_line_length:
        latex += "\\\\\n"
        line_length = 0
    latex += "\\textsuperscript{" + str(id) + "}" + aff + ", \n"
    line_length += _line_length

In [194]:
print(latex)

\textbf{Kenneth Enevoldsen\textsuperscript{1}}, 
\textbf{Isaac Chung}, 
\textbf{Ashwin Mathur}, 
\\
\textbf{Imene Kerboua\textsuperscript{2}\textsuperscript{3}}, 
\textbf{Márton Kardos\textsuperscript{1}}, 
\textbf{David Stap\textsuperscript{4}}, 
\textbf{Jay Gala\textsuperscript{5}}, 
\\
\textbf{Wissam Siblini}, 
\textbf{Dominik Krzemiński\textsuperscript{6}}, 
\textbf{Genta Indra Winata}, 
\\
\textbf{Saba Sturua\textsuperscript{7}}, 
\textbf{Saiteja Utpala\textsuperscript{8}}, 
\textbf{Gabriel Sequeira}, 
\\
\textbf{Marion Schaeffer\textsuperscript{9}}, 
\textbf{Mathieu Ciancone\textsuperscript{9}}, 
\textbf{Diganta Misra\textsuperscript{10}}, 
\\
\textbf{Shreeya Dhakal\textsuperscript{11}}, 
\textbf{Jonathan Rystrøm\textsuperscript{12}}, 
\textbf{Orion Weller\textsuperscript{13}}, 
\\
\textbf{Chenghao Xiao\textsuperscript{14}}, 
\textbf{Ömer Çağatan\textsuperscript{15}}, 
\textbf{Akash Kundu\textsuperscript{16}\textsuperscript{17}}, 
\textbf{Shitao Xiao\textsuperscript{18}}, 
\\
\te

In [None]:
# authors with 10 points or more

{g for g in github if g not in not_10}