# Meteor Scores

In [1]:
import pandas as pd
import re as re
import numpy as np

# Reproducibility
## Data Collection
@Misc{acl-ocl,
    author =       {Shaurya Rohatgi, Yanxia Qin, Benjamin Aw, Niranjana Unnithan, Min-Yen Kan},
    title =        {The ACL OCL Corpus: advancing Open science in Computational Linguistics},
    howpublished = {arXiv},
    year =         {2022},
    url =          {https://huggingface.co/datasets/ACL-OCL/ACL-OCL-Corpus}
}

In [2]:
df = pd.read_parquet('data/acl-publication-info.74k.v2.parquet')
df.head()

Unnamed: 0,acl_id,abstract,full_text,corpus_paper_id,pdf_hash,numcitedby,url,publisher,address,year,...,doi,number,volume,journal,editor,isbn,ENTRYTYPE,ID,language,note
0,O02-2002,There is a need to measure word similarity whe...,There is a need to measure word similarity whe...,18022704,0b09178ac8d17a92f16140365363d8df88c757d0,14,https://aclanthology.org/O02-2002,,,2002,...,,,,,,,inproceedings,chen-you-2002-study,,
1,L02-1310,,,8220988,8d5e31610bc82c2abc86bc20ceba684c97e66024,93,http://www.lrec-conf.org/proceedings/lrec2002/...,European Language Resources Association (ELRA),"Las Palmas, Canary Islands - Spain",2002,...,,,,,,,inproceedings,mihalcea-2002-bootstrapping,,
2,R13-1042,Thread disentanglement is the task of separati...,Thread disentanglement is the task of separati...,16703040,3eb736b17a5acb583b9a9bd99837427753632cdb,10,https://aclanthology.org/R13-1042,"INCOMA Ltd. Shoumen, BULGARIA","Hissar, Bulgaria",2013,...,,,,,,,inproceedings,jamison-gurevych-2013-headerless,,
3,W05-0819,"In this paper, we describe a word alignment al...","In this paper, we describe a word alignment al...",1215281,b20450f67116e59d1348fc472cfc09f96e348f55,15,https://aclanthology.org/W05-0819,Association for Computational Linguistics,"Ann Arbor, Michigan",2005,...,,,,,,,inproceedings,aswani-gaizauskas-2005-aligning,,
4,L02-1309,,,18078432,011e943b64a78dadc3440674419821ee080f0de3,12,http://www.lrec-conf.org/proceedings/lrec2002/...,European Language Resources Association (ELRA),"Las Palmas, Canary Islands - Spain",2002,...,,,,,,,inproceedings,suyaga-etal-2002-proposal,,


In [3]:
df.count()

acl_id             73285
abstract           67669
full_text          67455
corpus_paper_id    73285
pdf_hash           72076
numcitedby         73285
url                73285
publisher          63166
address            66093
year               73285
month              65962
booktitle          71244
author             72618
title              73285
pages              59478
doi                29678
number              1474
volume              1840
journal             2037
editor                13
isbn                1370
ENTRYTYPE          73285
ID                 73285
language            3020
note                 197
dtype: int64

In [4]:
df_renamed = df.rename(columns={"acl_id": "paper_ident",
                   "url": "paper_url",
                   "publisher": "paper_publisher",
                   "address": "paper_address",
                   "year": "paper_year",
                   "month": "paper_month",
                   "booktitle": "paper_booktitle",
                   "author": "paper_author",
                   "title": "paper_title",
                   "pages": "paper_pages",
                   "full_text": "paper_text",
                   "doi": "paper_doi"
                   })

df_cleaned = df_renamed.drop(columns=["abstract", "corpus_paper_id", "pdf_hash", 
                              "numcitedby", "number", "volume", "journal", 
                              "editor", "isbn", "ENTRYTYPE","ID", "language", "note"])

df_full = df_cleaned.dropna(subset=["paper_text"])
df_full.count()

paper_ident        67455
paper_text         67455
paper_url          67455
paper_publisher    57851
paper_address      60454
paper_year         67455
paper_month        60301
paper_booktitle    65440
paper_author       66888
paper_title        67455
paper_pages        57195
paper_doi          29456
dtype: int64


## METEOR Identification

In [5]:
df_meteor_prelim = df_full.copy()
df_meteor_prelim["paper_meteor_prelim"] = df_meteor_prelim["paper_text"].str.contains(" meteor ", case=False)
df_meteor_prelim['paper_meteor_prelim'].sum()

1613

In [28]:
import os
df_meteor_prelim.to_csv("out.csv")

## Paper Review
### METEOR Parameters
Parameters according to https://www.cs.cmu.edu/~alavie/METEOR/README.html

-l language                     Fully supported: en cz de es fr ar
                                Supported with language-independent parameters:
                                  da fi hu it nl no pt ro ru se 
                                  
-t task                         One of: rank util adq hter li tune
                                  util implies -ch

-p 'alpha beta gamma delta'     Custom parameters (overrides default)

-m 'module1 module2 ...'        Specify modules (overrides default)
                                  Any of: exact stem synonym paraphrase

-w 'weight1 weight2 ...'        Specify module weights (overrides default)

-r refCount                     Number of references (plaintext only)

-x beamSize                     (default 40)

-s wordListDirectory            (if not default for language)

-d synonymDirectory             (if not default for language)

-a paraphraseFile               (if not default for language)

-f filePrefix                   Prefix for output files (default 'meteor')

-q                              Quiet: Segment scores to stderr, final to stdout,
                                  no additional output (plaintext only)

-ch                             Character-based precision and recall

-norm                           Tokenize / normalize punctuation and lowercase
                                  (Recommended unless scoring raw output with
                                   pretokenized references)

-lower                          Lowercase only (not required if -norm specified)

-noPunct                        Do not consider punctuation when scoring
                                  (Not recommended unless special case)

-sgml                           Input is in SGML format

-mira                           Input is in MIRA format
                                  (Use '-' for test and reference files)

-vOut                           Output verbose scores (P / R / frag / score)

-ssOut                          Output sufficient statistics instead of scores

-writeAlignments                Output alignments annotated with Meteor scores
                                  (written to <prefix>-align.out)

In [7]:
def extract_parameters(text):
    pattern = r"((?: -[a-z123](?: [a-z0-9.]{1,4})?){2,})"
    matches = re.findall(pattern, text)
    return matches[0] if matches else None

In [8]:
df_params = df_meteor_prelim.copy()
df_params["paper_params"] = df_params.apply(lambda row: extract_parameters(row['paper_text']) if row['paper_meteor_prelim'] else None, axis=1)

In [9]:
df_params['paper_params'].notnull().sum()

9

In [10]:
df_params[df_params['paper_params'].notnull()]

Unnamed: 0,paper_ident,paper_text,paper_url,paper_publisher,paper_address,paper_year,paper_month,paper_booktitle,paper_author,paper_title,paper_pages,paper_doi,paper_meteor_prelim,paper_params
1245,W15-3016,This paper describes LIMSI's submissions to th...,https://aclanthology.org/W15-3016,Association for Computational Linguistics,"Lisbon, Portugal",2015,September,Proceedings of the Tenth Workshop on Statistic...,"Marie, Benjamin and\nAllauzen, Alexandre and...",{LIMSI}@{WMT}{'}15 : Translation Task,145--151,10.18653/v1/W15-3016,True,-o -v
3200,2016.amta-researchers.7,Domain adaptation is a major challenge when ap...,https://aclanthology.org/2016.amta-researchers.7,The Association for Machine Translation in the...,"Austin, TX, USA",2016,October 28 - November 1,Conferences of the Association for Machine Tra...,"Imamura, Kenji and\nSumita, Eiichiro",Multi-domain Adaptation for Statistical Machin...,79--92,,True,-3 to -2
3287,C18-1102,"In this work, we aim at developing an unsuperv...",https://aclanthology.org/C18-1102,Association for Computational Linguistics,"Santa Fe, New Mexico, USA",2018,August,Proceedings of the 27th International Conferen...,"Nayeem, Mir Tafseer and\nFuad, Tanvir Ahmed ...",Abstractive Unsupervised Multi-Document Summar...,1191--1204,,True,-n 2 -m -u -c 95 -x -r 1000 -f
17137,J13-4009,In this article we investigate statistical mac...,https://aclanthology.org/J13-4009,MIT Press,"Cambridge, MA",2013,December,,"Stymne, Sara and\nCancedda, Nicola and\nAhre...",Generation of Compound Words in Statistical Ma...,1067--1108,10.1162/COLI_a_00162,True,-s or -e
19988,W14-3336,This paper presents the results of the WMT14 M...,https://aclanthology.org/W14-3336,Association for Computational Linguistics,"Baltimore, Maryland, USA",2014,June,Proceedings of the Ninth Workshop on Statistic...,"Mach{\'a}{\v{c}}ek, Matou{\v{s}} and\nBojar, ...",Results of the {WMT}14 Metrics Shared Task,293--301,10.3115/v1/W14-3336,True,-1 -1
20220,W18-6450,This paper presents the results of the WMT18 M...,https://aclanthology.org/W18-6450,Association for Computational Linguistics,"Belgium, Brussels",2018,October,Proceedings of the Third Conference on Machine...,"Ma, Qingsong and\nBojar, Ond{\v{r}}ej and\nG...",Results of the {WMT}18 Metrics Shared Task: Bo...,671--688,10.18653/v1/W18-6450,True,-1 -1
22214,2011.eamt-1.10,The increasing use of eXtensible Markup Langua...,https://aclanthology.org/2011.eamt-1.10,European Association for Machine Translation,"Leuven, Belgium",2011,May 30{--}31,Proceedings of the 15th Annual conference of t...,"Tezcan, Arda and\nVandeghinste, Vincent",{SMT}-{CAT} integration in a Technical Domain:...,,,True,-t -d 0
28824,W14-3315,We describe the CMU systems submitted to the 2...,https://aclanthology.org/W14-3315,Association for Computational Linguistics,"Baltimore, Maryland, USA",2014,June,Proceedings of the Ninth Workshop on Statistic...,"Matthews, Austin and\nAmmar, Waleed and\nBha...",The {CMU} Machine Translation Systems at {WMT}...,142--149,10.3115/v1/W14-3315,True,-s or -e
66515,2008.eamt-1.25,We investigated the effects of processing Swed...,https://aclanthology.org/2008.eamt-1.25,European Association for Machine Translation,"Hamburg, Germany",2008,September 22-23,Proceedings of the 12th Annual conference of t...,"Stymne, Sara and\nHolmquist, Maria",Processing of {S}wedish compounds for phrase-b...,182--191,,True,-e -a pojk


### METEOR Protocol

- Language: -l language
    - Use settings for specified language. Lang can be either the language name or two letter code. See the supported language list.
    - Fully supported: en cz de es fr ar
    - Supported with language-independent parameters: fi hu it nl no pt ro ru se tr
    
- Task: -t task
    - Use a different pre-defined set of parameters for scoring (currently limited to English):
        - rank: parameters tuned to human rankings from WMT09 and WMT10
        - adq: parameters tuned to adequacy scores from NIST Open MT 2009
        - hter: parameters tuned to HTER scores from GALE P2 and P3
        - li: language-independent parameters
    - One of: rank util adq hter li tune (util implies -ch)

- Modules: -m 'module1 module2 ...'
    - Set modules manually. Options are: exact stem synonym paraphrase. See supported languages. Module string should be quoted. 

- Normalize: -norm
    - Tokenize and lowercases input lines, normalize punctuation to improve scoring accuracy. This option is highly recommended unless scoring raw system output against pretokenized references. 

- Lowercase: -lower
    - Lowercase input lines (not required if -norm also specified). This is most commonly used scoring cased, tokenized outputs with pretokenized references. 

- Ignore Punctuation: -noPunct
    - If specified, punctuation symbols will be removed before scoring. This is generally not recommended as parameters are tuned with punctuation included. 


In [11]:
regex_protocol = {
    'rank': r'\b(?:rank|ranking|WMT09|WMT10)\b',
    'adq': r'\b(?:adq|adequacy|NIST Open MT 2009)\b',
    'hter': r'\b(?:hter|HUMAN-targeted translation edit rate|GALE P2|GALE P3)\b',
    'li': r'\b(?:li|language[- ]independent)\b',
    'tune': r'\b(?:tune|tuning|parameter optimization)\b',
    'modules': r'\b(?:-m\s+(?:exact|stem|synonym|paraphrase)|module|exact|stem|synonym|paraphrase)\b',
    'normalize': r'\b(?:-norm|normalize|normalization|tokenize\s+and\s+lowercase|normalize\s+punctuation)\b',
    'lowercase': r'\b(?:-lower|lowercase|lowercasing|casing)\b',
    'ignore_punctuation': r'\b(?:-noPunct|no\s+Punctuation|ignore\s+punctuation|remove\s+punctuation)\b'
}

In [12]:
def search_terms_near_meteor(text, regex_dict):
    results = []
    for term, pattern in regex_dict.items():
        # Find all occurrences of 'meteor' (case insensitive)
        for match in re.finditer(r'meteor', text, re.IGNORECASE):
            start, end = match.start(), match.end()
            # Define a 500-character window around 'meteor'
            window_start, window_end = max(0, start - 500), min(len(text), end + 500)
            # Search for the term within this window
            if re.search(pattern, text[window_start:window_end], re.IGNORECASE):
                results.append(term)
    return list(set(results))  # Return unique terms

In [13]:
df_protocol=df_params.copy()
df_protocol['paper_protocol'] = df_protocol[df_protocol['paper_meteor_prelim'] == True]['paper_text'].apply(lambda x: search_terms_near_meteor(x, regex_protocol))

In [14]:
# Display the relevant columns
filtered_df_protocol = df_protocol[(df_protocol['paper_meteor_prelim'] == True) & (df_protocol['paper_protocol'].astype(bool))]

# Display the filtered rows
print(filtered_df_protocol[['paper_text', 'paper_meteor_prelim', 'paper_protocol']])
print(filtered_df_protocol["paper_protocol"].value_counts())

                                              paper_text  paper_meteor_prelim  \
256    This paper presents the LIG participation to t...                 True   
333    This paper presents a new hypothesis alignment...                 True   
421    Research on statistical machine translation ha...                 True   
454    Graph-to-text generation aims to generate flue...                 True   
559    This paper describes a computational linguisti...                 True   
...                                                  ...                  ...   
72910  Machine translation systems are vulnerable to ...                 True   
73011  Current evaluation metrics for language modeli...                 True   
73094  Medical terminologies resources and standards ...                 True   
73204  Letter-like communications (such as email) are...                 True   
73251  Codeswitching is an omnipresent phe nomenon in...                 True   

              paper_protoco

### Variants
- Character-based -ch
    - Calculate character-based precision and recall. Alignment is still word and phrase-level. Fragmentation penalty is still word and phrase-level. 

- Ignore Punctuation: -noPunct
    - If specified, punctuation symbols will be removed before scoring. This is generally not recommended as parameters are tuned with punctuation included.

- Verbose Output: -vOut
    - Output verbose scores (Precision, Recall, Fragmentation, Score) in place of regular scores

In [15]:
regex_variants = {
    'character_based': r'\b(?:-ch|character[- ]based|calculate\s+character[- ]based\s+precision\s+and\s+recall)\b',
    'ignore_punctuation': r'\b(?:-noPunct|no\s+punctuation|ignore\s+punctuation|remove\s+punctuation)\b',
    'verbose_output': r'\b(?:-vOut|verbose\s+output|output\s+verbose\s+scores)\b'
}

In [16]:
df_variants=df_protocol.copy()
df_variants['paper_variants'] = df_variants[df_variants['paper_meteor_prelim'] == True]['paper_text'].apply(lambda x: search_terms_near_meteor(x, regex_variants))

In [17]:
# Display the relevant columns
filtered_df_variants = df_variants[(df_variants['paper_meteor_prelim'] == True) & (df_variants['paper_variants'].astype(bool))]

# Display the filtered rows
print(filtered_df_variants[['paper_text', 'paper_meteor_prelim', 'paper_variants']])
print(df_variants['paper_variants'].value_counts())

                                              paper_text  paper_meteor_prelim  \
2089   In this paper, we demonstrate that accurate ma...                 True   
2912   In this paper, we present the Moses-based infr...                 True   
15971  In recent years, machine learning models have ...                 True   
26670  Question generation (QG) attempts to solve the...                 True   
37679  Multiword Expressions (MWEs) are a frequently ...                 True   
46148  This paper reports the results of the first ex...                 True   
52543  This paper proposes a new automatic machine tr...                 True   
58489  Dialogue summarization is receiving increasing...                 True   
60815  Reliably evaluating Machine Translation (MT) t...                 True   
66845  We propose WMD O , a metric based on distance ...                 True   
72673  We present CHARCUT, a character-based machine ...                 True   

          paper_variants  


### METEOR packages
- Original METEOR (Java): https://www.cs.cmu.edu/~alavie/METEOR/
- MS/meteor https://github.com/tylin/coco-caption/tree/3a9afb2682141a03e1cdc02b0df6770d2c884f6f/pycocoevalcap/meteor
    - python wrapper for Java
- pymeteor https://github.com/zembrodt/pymeteor
    - python implementation
- NLTK: https://github.com/nltk/nltk/blob/develop/nltk/translate/meteor_score.py
    - python implementation
- GenerationEval https://github.com/WebNLG/GenerationEval/blob/master/eval.py
    - NLTK
- EvaluateMetric https://huggingface.co/spaces/evaluate-metric/meteor
    - NLTK
- Fairseq https://github.com/facebookresearch/fairseq/blob/main/fairseq/scoring/meteor.py
    - NLTK

In [18]:
regex_meteor_versions = {
    'Meteor_1.5': r'Meteor\s+Universal.*?Denkowski.*?Lavie.*?EACL.*?2014',
    'Meteor_1.3': r'Meteor\s+1\.3.*?Denkowski.*?Lavie.*?EMNLP.*?2011',
    'Meteor_NEXT': r'METEOR-NEXT.*?Denkowski.*?Lavie.*?ACL.*?2010',
    'Meteor_Phrase_Level': r'Meteor.*?Phrase\s+Level.*?NAACL/HLT.*?2010',
    'Meteor_MT_2010': r'METEOR.*?Machine\s+Translation.*?Machine\s+Translation.*?2010',
    'Meteor_MBleu_MTer': r'METEOR.*?M-BLEU.*?M-TER.*?ACL.*?2008',
    'Meteor_Automatic_MT_2007': r'METEOR.*?Automatic.*?MT\s+Evaluation.*?ACL.*?2007',
    'Meteor_Automatic_MT_2005': r'METEOR.*?Automatic.*?MT\s+Evaluation.*?ACL.*?2005',
    "coco":r'coco-cap',
    "pymeteor": r'pymeteor|zembrodt',   
    "generationeval": r'generationeval|webnlg',
    "evaluatemetric": r'evaluatemetric|evaluate-metric',    
    "fairseq": r'fairseq' 
}


In [19]:
def search_for_regex_pattern(text, regex_dict):
    results = []
    for term, pattern in regex_dict.items():
        # Search for the pattern in the entire text
        if re.search(pattern, text, re.IGNORECASE):
            results.append(term)
    return list(set(results))  # Return unique terms

In [20]:
df_packages = df_variants.copy()
# Applying the function to the DataFrame
df_packages['paper_packages'] = df_packages[df_packages['paper_meteor_prelim'] == True]['paper_text'].apply(lambda x: search_for_regex_pattern(x, regex_meteor_versions))

In [21]:
filtered_df = df_packages[(df_packages['paper_meteor_prelim'] == True) & (df_packages['paper_packages'].astype(bool))]

# Displaying the results
meteor_packages_str = '\n'.join(filtered_df['paper_packages'].astype(str))

# Print the string
print(meteor_packages_str)

['Meteor_MT_2010']
['generationeval']
['Meteor_MT_2010']
['fairseq']
['Meteor_MT_2010']
['fairseq']
['Meteor_MT_2010']
['Meteor_MT_2010']
['Meteor_MT_2010']
['Meteor_MT_2010']
['Meteor_MT_2010']
['Meteor_MT_2010']
['Meteor_MT_2010']
['Meteor_MT_2010']
['Meteor_MT_2010']
['Meteor_Automatic_MT_2005']
['Meteor_MT_2010']
['Meteor_MT_2010', 'Meteor_Automatic_MT_2007', 'Meteor_Automatic_MT_2005', 'Meteor_NEXT']
['Meteor_MT_2010']
['Meteor_MT_2010']
['Meteor_MT_2010']
['Meteor_MT_2010']
['Meteor_MT_2010']
['Meteor_MT_2010']
['Meteor_Automatic_MT_2005']
['Meteor_MT_2010']
['Meteor_MT_2010']
['generationeval', 'fairseq']
['fairseq']
['Meteor_MT_2010']
['Meteor_MT_2010']
['generationeval']
['fairseq']
['Meteor_Automatic_MT_2007', 'Meteor_Automatic_MT_2005']
['Meteor_MT_2010']
['Meteor_MT_2010', 'fairseq']
['Meteor_MT_2010']
['fairseq']
['Meteor_MT_2010']
['generationeval']
['generationeval']
['generationeval']
['generationeval']
['Meteor_Automatic_MT_2005']
['Meteor_MT_2010']
['Meteor_MT_2010']


In [22]:
filtered_df["paper_packages"].value_counts()

paper_packages
[Meteor_MT_2010]                                                                     102
[generationeval]                                                                      57
[fairseq]                                                                             47
[coco]                                                                                 7
[Meteor_Automatic_MT_2005]                                                             5
[Meteor_Automatic_MT_2007, Meteor_Automatic_MT_2005]                                   5
[generationeval, fairseq]                                                              2
[Meteor_MT_2010, fairseq]                                                              2
[Meteor_MT_2010, Meteor_Automatic_MT_2007, Meteor_Automatic_MT_2005]                   2
[Meteor_MT_2010, Meteor_Automatic_MT_2007, Meteor_Automatic_MT_2005, Meteor_NEXT]      1
[Meteor_MT_2010, coco]                                                                 1
[Meteo

#### Other METEOR

In [23]:
regex_packages = {
    "meteor_something" : r'\b(?:\w+meteor|meteor[\w.]+)\b',
    "nltk": r'nltk',
    "nltk_old": r'nltk\s*(==)?\s*(([0-2]\.\d+\.\d+)|3\.[0-3]\.\d+|3\.4\.0)',
    "nltk_341": r'nltk\s*(==)?\s*(3\.4\.[1-9]|3\.4\.[1-9]\d+|3\.[5]\.\d+|3\.6\.[0-2])',
    "nltk_363": r'nltk\s*(==)?\s*3\.6\.[3-4]',
    "nltk_365": r'nltk\s*(==)?\s*(3\.6\.[5-9]|[4-9]\.\d+\.\d+|3\.[7-9]\.\d+|3\.6\.\d{2,})', 
    "nltk_version": r'nltk\s*(\d+\.\d+(?:\.\d+)?)?'
    }

In [24]:
def extract_regex_pattern(text, regex_dict):
    results = []
    for pattern in regex_dict.values():
        # Search for the pattern in the entire text and extract matches
        matches = re.findall(pattern, text, re.IGNORECASE)
        results.extend(matches)
    return list(set(results))  # Return unique extracted patterns

In [25]:
df_temp = df_variants.copy()
# Applying the function to the DataFrame
df_temp['meteor_packages'] = df_temp[df_temp['paper_meteor_prelim'] == True]['paper_text'].apply(lambda x: extract_regex_pattern(x, regex_packages))

In [26]:
filtered_df = df_temp[(df_temp['paper_meteor_prelim'] == True) & (df_temp['meteor_packages'].astype(bool))]

# Displaying the results
meteor_packages_str = '\n'.join(filtered_df['meteor_packages'].astype(str))

# Print the string
print(meteor_packages_str)

['METEORand']
['', 'NLTK']
['', 'NLTK']
['', 'NLTK']
['', 'NLTK']
['Meteor1.5', 'METEORderived']
['HMeteor', 'HMETEOR']
['', 'NLTK']
['', 'NLTK']
['', 'NLTK']
['HMETEOR']
['', 'NLTK']
['', 'NLTK']
['', 'NLTK']
['meteor.com']
['', 'NLTK']
['METEORR']
['METEORR']
['', 'NLTK']
['', 'NLTK']
['', 'NLTK']
['', 'NLTK']
['CMeteor', 'VMeteor']
['', 'NLTK']
['', 'NLTK']
['', 'NLTK']
['', 'NLTK']
['MeteorNP']
['HMETEOR']
['', 'NLTK']
['METEOR.porter', 'METEOR.wn2', 'METEOR.wn1', 'METEOR.exact']
['', 'NLTK']
['METEORWSD']
['', 'NLTK']
['tenceMeteor']
['', 'NLTK']
['', 'NLTK']
['', 'NLTK']
['', 'NLTK']
['', 'NLTK']
['', 'NLTK']
['', 'NLTK']
['', 'NLTK']
['Meteorological', 'meteorologists']
['', 'NLTK']
['', 'NLTK']
['meteoroid']
['nltk', 'NLTK', '']
['', 'NLTK']
['meteorological', 'meteorologists', 'meteorologist']
['', 'NLTK']
['', 'NLTK']
['', 'NLTK']
['HMETEOR']
['', 'NLTK']
[('==', '3.4.5'), 'NLTK', 'meteors', '']
['', 'NLTK']
['nltk', '']
['', 'NLTK']
['', 'NLTK']
['', 'NLTK']
['', 'NLTK']
[''

## Code Review
### Get URLs from papers
- [ ] code_url	URL of code repository cited in paper.

In [77]:
df_url = df_packages.copy()

# Assuming df_url is your DataFrame and it contains the column 'paper_text'
# Define the regex pattern for URLs
regex_url = r'https?://(?:www\.)?(?:github\.com)[^\s)]*(?<!\.)'

# Function to extract URLs from a text
def extract_urls(text):
    return re.findall(regex_url, text)

# Filter the DataFrame to rows where 'paper_meteor_prelim' is True, then extract URLs
extracted_urls = df_url[df_url['paper_meteor_prelim'] == True]['paper_text'].apply(extract_urls).explode().dropna().unique().tolist()

urls_df = pd.DataFrame(extracted_urls, columns=['URLs'])

### URL check

In [78]:
import requests
# Function to check if a URL exists
def url_exists(url):
    try:
        response = requests.head(url, allow_redirects=True, timeout=5)
        return response.status_code == 200
    except requests.RequestException:
        return False

# Check if each URL is active and add the result to a new column in the DataFrame
urls_df['active'] = urls_df['URLs'].apply(url_exists)

In [80]:
# Remove all rows where 'active' is False
urls_df = urls_df[urls_df['active']].copy()

### Use Github API to search for meteor
- [ ] code_meteor_prelim	Does the code mention Meteor? Identified using GitHub API search.

In [82]:
# Function to extract the necessary part of the GitHub URL for the API request
def extract_github_repo_name(url):
    # Splitting the URL and extracting the repository part
    # Typical GitHub URL format: https://github.com/username/repository
    parts = url.split('/')
    if len(parts) > 4 and 'github.com' in parts:
        # Extracting the username and repository name
        return '/'.join(parts[3:5])
    else:
        return None

# Apply the function to the 'URLs' column
urls_df['GitHub_Repo'] = urls_df['URLs'].apply(extract_github_repo_name)

# Filtering out rows where the GitHub repository name couldn't be extracted
urls_df = urls_df[urls_df['GitHub_Repo'].notnull()]

In [84]:
github_token = "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"

In [85]:
import time

# Function to search a GitHub repository for a keyword
def search_github_repo(repo_name, keyword='meteor', token=github_token):
    headers = {
        'Authorization': f'token {token}',
        'Accept': 'application/vnd.github.v3+json'
    }
    search_url = f'https://api.github.com/search/code?q={keyword}+repo:{repo_name}'
    response = requests.get(search_url, headers=headers)
    if response.status_code == 200:
        return response.json()['total_count']
    else:
        print(f"Error {response.status_code}: {response.json().get('message', 'No message')}")
        return None

# Apply the search function to each GitHub repository, respecting the rate limit
search_results = []
for i, repo_name in enumerate(urls_df['GitHub_Repo']):
    count = search_github_repo(repo_name)
    search_results.append({
        'Repository': repo_name,
        'Meteor_Count': count
    })
    # Respect the rate limit
    if i % 9 == 8:  # After every 9th request
        time.sleep(60)
        
# Convert the results to a DataFrame
search_results_df = pd.DataFrame(search_results)

In [88]:
# Filter the search_results_df to keep only rows where 'Meteor_Count' is greater than 0
filtered_search_results_df = search_results_df[search_results_df['Meteor_Count'] > 0]
filtered_search_results_df.to_csv("out.csv")

### Manual Code Review
- [ ] code_meteor	Does the code perform reproducible Meteor evaluation? Manual static review, subjective assessment of Rouge evaluation reproducibility.

- [ ] code_packages	List of Meteor packages found in code. Identified using manual review.

## Reproducibility
R1: Paper cites METEOR package and parameters.

R2: Paper cites no-config4 METEOR package.

R3: Codebase has complete METEOR evaluation 

In [152]:
df_reproducible = df_packages.copy()
df_reproducible['reproducible'] = None  # Initialize the column with None
condition = df_reproducible['paper_packages'].notna() & df_reproducible['paper_params'].notna()
df_reproducible.loc[condition, 'reproducible'] = 'R1'

In [155]:
r1_count = df_reproducible['reproducible'].value_counts().get('R1', 0)

# Print the count
print(r1_count)

9


# Questions
- how to define code reproducibility
- is it enough if they say they used nlgeval? YES
- moving to github
- how to store github access token but not publish it

- go to thesis meetup?

# ToDo
- redo search for bitbucket etc
- merge code review with literature review
- redo text search for packages

dsd
- how do wrappers differ? put them in buckets

dsd
- comparability of scores
- influence of parameters