In [2]:
from scholarly import scholarly
import re

def parse_citation(citation_text):
    # Split the citation text at the first period, assuming the format "[num] authors. title."
    parts = citation_text.split('.')
    if len(parts) < 2:
        return None  # Unable to parse if there's no period separating authors and title
    
    # Extract the citation number and authors from the first part
    num_authors_part = parts[0].strip()
    # Assuming the authors are after the citation number and space
    authors = num_authors_part.split(' ', 1)[-1] if ' ' in num_authors_part else num_authors_part
    
    # The title is the second part, but we need to further process it to extract the year and possibly the venue
    title_part = parts[1].strip()
    title_part = title_part.split('[')[0]
    
    # For simplicity, we're not extracting the venue here, but you could extend this to handle more details
    return authors, title_part  # Assuming last name is the last word in the first author's name

def fetch_bibtex(query):
    search_query = scholarly.search_pubs(query)
    try:
        first_result = next(search_query)
        return scholarly.bibtex(first_result)
    except StopIteration:
        return None

def process_citations(input_file, output_file):
    with open(input_file, 'r') as infile, open(output_file, 'w') as outfile:
        for line in infile:
            parsed = parse_citation(line)
            if parsed:
                first_author_last_name, title = parsed
                query = f"{first_author_last_name}. {title}"
                print(query)
                bibtex_entry = fetch_bibtex(query)
                if bibtex_entry:
                    print(bibtex_entry)
                    outfile.write(bibtex_entry + "\n\n")
                else:
                    print("faile")
                    outfile.write(f"// BibTeX not found for query: {query}\n\n")
            else:
                print(line)

# Adjust the file paths accordingly
input_file_path = "cite.txt"  # Use the actual path to your file
output_file_path = "output_bibtex.txt"  # Choose where to save the output

# Since we can't execute this in the current environment, ensure to run it where you have scholarly installed and internet access.
process_citations(input_file_path, output_file_path)


KWIATKOWSKA M Z, NORMAN G, PARKER D. Probabilistic symbolic model checking with PRISM: a hybrid approach
@article{kwiatkowska2004probabilistic,
 abstract = {In this paper we present efficient symbolic techniques for probabilistic model checking. These have been implemented in PRISM, a tool for the analysis of probabilistic models such as discrete-time Markov chains, continuous-time Markov chains and Markov decision processes using specifications in the probabilistic temporal logics PCTL and CSL. Motivated by the success of model checkers such as SMV which use BDDs (binary decision diagrams), we have developed an implementation of PCTL and CSL model checking based on},
 author = {Kwiatkowska, Marta and Norman, Gethin and Parker, David},
 journal = {International journal on software tools for technology transfer},
 pages = {128--142},
 pub_year = {2004},
 publisher = {Springer},
 title = {Probabilistic symbolic model checking with PRISM: A hybrid approach},
 venue = {International journa

In [10]:
test= "Towards model checking quantum security protocols. 2007"
search_query = scholarly.search_pubs(test)
next(search_query)

StopIteration: 

In [10]:
def remove_newlines(file_path):
    # Open the file and read its contents
    with open(file_path, 'r') as file:
        content = file.read()
    
    # Remove all newline characters from the content
    modified_content = content.replace('\n', '')
    
    # Write the modified content back to the file
    with open(file_path, 'w') as file:
        file.write(modified_content)

# Example usage
file_path = "cite.txt"  # Replace with your file path
remove_newlines(file_path)


In [11]:
import re

def add_newline_before_numbers(input_file, output_file=None):
    """
    Adds a newline before patterns of the form [some number] in the text of the input file.
    If output_file is None, it writes the changes back to the input_file.
    """
    # If no output file is specified, overwrite the input file
    if output_file is None:
        output_file = input_file

    with open(input_file, 'r', encoding='utf-8') as infile:
        content = infile.read()
    
    # Regular expression to match patterns of the form [some number]
    modified_content = re.sub(r"(\[\d+\])", r"\n\1", content)
    
    with open(output_file, 'w', encoding='utf-8') as outfile:
        outfile.write(modified_content)

# Example usage:
input_file_path = "cite.txt"  # Update this path
# This will overwrite the input file. If you want to write to a different file, specify the output_file_path.
add_newline_before_numbers(input_file_path)
# If you want to write to a new file, uncomment the next line and provide a path for the output file.
# add


In [3]:
import re
from collections import defaultdict

def find_duplicate_titles(bibtex_file):
    # Regular expression to match BibTeX entries and titles
    entry_pattern = re.compile(r'@(\w+){([^,]+),')
    title_pattern = re.compile(r'\btitle\s*=\s*{([^}]+)}', re.IGNORECASE)
    
    # Store titles and their corresponding BibTeX entries
    titles = defaultdict(list)
    
    # Read the BibTeX file
    with open(bibtex_file, 'r') as file:
        content = file.read()
        
        # Find all BibTeX entries
        entries = entry_pattern.split(content)[1:]
        # Process entries in pairs: type and body
        for i in range(0, len(entries), 2):
            if i+1 == len(entries):
                continue
            entry_type, body = entries[i], entries[i + 1]
            title_match = title_pattern.search(body)
            if title_match:
                # Extract title and remove whitespace and lowercase for comparison
                title = title_match.group(1).strip().lower()
                titles[title].append((entry_type, body))
    
    # Find and print duplicates
    for title, entries in titles.items():
        if len(entries) > 1:
            print(f"Duplicate title found: {title}")
            for entry_type, body in entries:
                print(f"@{entry_type}{{\n{body}\n}}\n")

# Example usage
bibtex_file_path = "../Biblio/ref.bib"  # Update this path to your actual file path
find_duplicate_titles(bibtex_file_path)
