# Create points table and author list

In [157]:
import sys
from pathlib import Path
import os
project_path = Path(os.getcwd()) / ".."

sys.path.append(str(project_path / "docs" / "mmteb"))

In [158]:
from create_points_table import load_data 

## Point table

In [159]:
df = load_data()
df = df.groupby("GitHub").sum().astype(int)
# create a new column with the sum of the points
df["Total"] = df.sum(axis=1)
df = df.sort_values("Total", ascending=False)
# total as first column
df = df[["Total"] + [col for col in df.columns if col != "Total"]]

In [160]:
df

Unnamed: 0_level_0,Total,Bug fixes,Review PR,New dataset,Dataset annotations,Paper writing,New task,Coordination,Running Models
GitHub,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
KennethEnevoldsen,505,85,306,68,35,0,0,11,0
isaac-chung,383,50,194,120,1,12,2,4,0
awinml,302,0,2,300,0,0,0,0,0
imenelydiaker,288,24,144,120,0,0,0,0,0
x-tabdeveloping,199,10,32,144,0,0,12,1,0
...,...,...,...,...,...,...,...,...,...
NouamaneTazi,2,0,2,0,0,0,0,0,0
MexicanLemonade,2,0,0,2,0,0,0,0,0
cslizc,2,0,0,2,0,0,0,0,0
hanhainebula,2,0,0,2,0,0,0,0,0


In [161]:
print(df.to_latex(longtable=True, caption="Contributions by GitHub users. See \autoref{tab:authors} for the mapping between authors and GitHub handles.", label="tab:contributions"))

\begin{longtable}{lrrrrrrrrr}
\caption{Contributions by GitHub users. See utoref{tab:authors} for the mapping between authors and GitHub handles.} \label{tab:contributions} \\
\toprule
 & Total & Bug fixes & Review PR & New dataset & Dataset annotations & Paper writing & New task & Coordination & Running Models \\
GitHub &  &  &  &  &  &  &  &  &  \\
\midrule
\endfirsthead
\caption[]{Contributions by GitHub users. See utoref{tab:authors} for the mapping between authors and GitHub handles.} \\
\toprule
 & Total & Bug fixes & Review PR & New dataset & Dataset annotations & Paper writing & New task & Coordination & Running Models \\
GitHub &  &  &  &  &  &  &  &  &  \\
\midrule
\endhead
\midrule
\multicolumn{10}{r}{Continued on next page} \\
\midrule
\endfoot
\bottomrule
\endlastfoot
KennethEnevoldsen & 505 & 85 & 306 & 68 & 35 & 0 & 0 & 11 & 0 \\
isaac-chung & 383 & 50 & 194 & 120 & 1 & 12 & 2 & 4 & 0 \\
awinml & 302 & 0 & 2 & 300 & 0 & 0 & 0 & 0 & 0 \\
imenelydiaker & 288 & 24 & 144 &

# Contributor affiliations

In [162]:



points_to_authors = project_path / "docs" / "mmteb" / "points.md"

# extract table from markdown file
with open(points_to_authors, "r") as f:
    lines = f.readlines()

table = False
table_lines = []
colnames = []
head_skipped = False
for line in lines:
    if not table and line.startswith("|"):
        table = True
        colnames = [c.strip() for c in  line.strip().split("|")[1:-1]]
        continue
    if colnames and table and not head_skipped:
        head_skipped = True
        continue
    if table:
        table_lines.append([c.strip() for c in  line.strip().split("|")[1:-1]])
    if table and line.strip() == "":
        break



In [163]:
# create a dataframe from the table
import pandas as pd
author_df = pd.DataFrame(table_lines, columns=colnames)

In [164]:
author_df

Unnamed: 0,GitHub,First name,Last name,Email,User on openreview,Affiliations
0,KennethEnevoldsen,Kenneth,Enevoldsen,kennethcenevoldsen@gmail.com,~Kenneth_Enevoldsen1,"Aarhus University, Denmark"
1,x-tabdeveloping,Márton,Kardos,martonkardos@cas.au.dk,~Márton_Kardos1,"Aarhus University, Denmark"
2,imenelydiaker,Imene,Kerboua,,,"Esker, Lyon, France && INSA Lyon, LIRIS, Lyon,..."
3,wissam-sib,Wissam,Siblini,wissamsiblini92@gmail.com,,
4,GabrielSequeira,Gabriel,Sequeira,,,
...,...,...,...,...,...,...
65,artemsnegirev,Artem,Snegirev,artem.s.snegirev@gmail.com,~Artem_Snegirev1,"SaluteDevices, Russia"
66,anpalmak2003,Anna,Maksimova,anpalmak@gmail.com,~Anna_Maksimova1,"SaluteDevices, Russia"
67,MariyaTikhonova,Maria,Tikhonova,m_tikhonova94@mail.ru,~Maria_Tikhonova1,"SaluteDevices, HSE University, Russia"
68,guenthermi,Michael,Günther,michael.guenther@jina.ai,~Michael_Günther1,Jina AI


In [165]:
print(author_df.to_latex())

\begin{tabular}{lllllll}
\toprule
 & GitHub & First name & Last name & Email & User on openreview & Affiliations \\
\midrule
0 & KennethEnevoldsen & Kenneth & Enevoldsen & kennethcenevoldsen@gmail.com & ~Kenneth_Enevoldsen1 & Aarhus University, Denmark \\
1 & x-tabdeveloping & Márton & Kardos & martonkardos@cas.au.dk & ~Márton_Kardos1 & Aarhus University, Denmark \\
2 & imenelydiaker & Imene & Kerboua &  &  & Esker, Lyon, France && INSA Lyon, LIRIS, Lyon, France \\
3 & wissam-sib & Wissam & Siblini & wissamsiblini92@gmail.com &  & N/A \\
4 & GabrielSequeira & Gabriel & Sequeira &  &  & N/A \\
5 & schmarion & Marion & Schaeffer &  & ~Marion_Schaeffer1 & Wikit, Lyon, France \\
6 & MathieuCiancone & Mathieu & Ciancone &  &  & Wikit, Lyon, France \\
7 & MartinBernstorff & Martin & Bernstorff & martinbernstorff@gmail.com & ~Martin_Bernstorff1 & Aarhus University, Denmark \\
8 & staoxiao & Shitao & Xiao & 2906698981@qq.com & ~Shitao_Xiao1 & Beijing Academy of Artificial Intelligence \\
9 & Z

# Author list

In [166]:
github = set(author_df["GitHub"])

not_10 =[]

df = df.reset_index()
# check if all github users are in the points table and has 10 total point 
for gh in github:
    if gh not in set(df["GitHub"]):
        print(f"{gh} not in points table")

    if df[df["GitHub"] == gh]["Total"].values[0] < 10:
        print(f"{gh} has less than 10 points")
        not_10.append(gh)



izhx has less than 10 points
achibb has less than 10 points


In [167]:
# sort author_df by total points
author_df = pd.merge(author_df, df[["GitHub", "Total"]], on="GitHub", how="left")
author_df = author_df.sort_values("Total", ascending=False)

In [168]:
# create a latex author list
# \textbf{First Last \textsuperscript{1}},
# \textbf{First Last \textsuperscript{1}},
# [if too long add \\]
# ...
# \\
# \\
# \textsuperscript{1}Aarhus University, Denmark,
# ...
# [if too long add \\]

In [175]:
author_list = []
affiations = {}

aff_id = 1
for i, row in author_df.iterrows():
    author = row["First name"] + " " + row["Last name"]
    if row["GitHub"] in not_10:
        continue
    author_str = f"\\textbf{{{author}"

    if row["Affiliations"]:
        affiliations = row["Affiliations"].split("&&")

        for aff in affiliations:
            if "N/A" in aff:
                continue
            if aff not in affiations:
                affiations[aff] = aff_id
                aff_id += 1
            author_str += f"\\textsuperscript{{{affiations[aff]}}}" 

    # if row["Affiliations"] not in affiations and row["Affiliations"]:
    #     affiations[row["Affiliations"]] = aff_id
    #     aff_id += 1
    #     author_str += f"\\textsuperscript{{{affiations[row['Affiliations']]}}}"
    author_str += "}"
    author_list.append(author_str)

In [176]:
# create the latex string 

latex = ""
line_length = 0
max_line_length = 80

for i, author in enumerate(author_list):
    _line_length = len(author.split("\\textsuperscript")[0])
    if line_length + _line_length > max_line_length:
        latex += "\\\\\n"
        line_length = 0
    latex += author + ", \n"
    line_length += _line_length

# add the affiliations
line_length = 0
latex += "\\\\\n"
latex += "\\\\\n"
for aff, id in affiations.items():
    if "N/A" in aff:
        continue
    _line_length = len(aff)
    if line_length + _line_length > max_line_length:
        latex += "\\\\\n"
        line_length = 0
    latex += "\\textsuperscript{" + str(id) + "}" + aff + ", \n"
    line_length += _line_length
    


In [177]:
print(latex)

\textbf{Kenneth Enevoldsen\textsuperscript{1}}, 
\textbf{Isaac Chung}, 
\textbf{Ashwin Mathur}, 
\\
\textbf{Imene Kerboua\textsuperscript{2}\textsuperscript{3}}, 
\textbf{Márton Kardos\textsuperscript{1}}, 
\textbf{David Stap\textsuperscript{4}}, 
\textbf{Jay Gala\textsuperscript{5}}, 
\\
\textbf{Wissam Siblini}, 
\textbf{Dominik Krzemiński\textsuperscript{6}}, 
\textbf{Genta Indra Winata\textsuperscript{7}}, 
\\
\textbf{Saba Sturua\textsuperscript{8}}, 
\textbf{Saiteja Utpala\textsuperscript{9}}, 
\textbf{Gabriel Sequeira}, 
\\
\textbf{Marion Schaeffer\textsuperscript{10}}, 
\textbf{Mathieu Ciancone\textsuperscript{10}}, 
\textbf{Diganta Misra\textsuperscript{11}}, 
\\
\textbf{Shreeya Dhakal\textsuperscript{12}}, 
\textbf{Jonathan Rystrøm\textsuperscript{13}}, 
\textbf{Orion Weller\textsuperscript{14}}, 
\\
\textbf{Chenghao Xiao\textsuperscript{15}}, 
\textbf{Ömer Çağatan\textsuperscript{16}}, 
\textbf{Akash Kundu\textsuperscript{17}\textsuperscript{18}}, 
\textbf{Shitao Xiao\textsupe