<a href="https://colab.research.google.com/github/bhaskatripathi/CitationFinder/blob/main/CitationFinder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install pybtex pybtex-apa-style scholarly openai 

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pybtex
  Downloading pybtex-0.24.0-py2.py3-none-any.whl (561 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m561.4/561.4 KB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pybtex-apa-style
  Downloading pybtex_apa_style-1.3-py3-none-any.whl (6.4 kB)
Collecting scholarly
  Downloading scholarly-1.7.11-py3-none-any.whl (39 kB)
Collecting openai
  Downloading openai-0.27.2-py3-none-any.whl (70 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.1/70.1 KB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
Collecting latexcodec>=1.0.4
  Downloading latexcodec-2.0.1-py2.py3-none-any.whl (18 kB)
Collecting sphinx-rtd-theme
  Downloading sphinx_rtd_theme-1.2.0-py2.py3-none-any.whl (2.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.8/2.8 MB[0m [31m39.2 MB/s[0m eta [36m0:00:00[0m
Collecting free-proxy
  Downloading f

In [None]:

import requests
import pandas as pd
import re
import openai
from pybtex.database import BibliographyData, Entry
from pybtex.style.formatting import plain

# Define the text you want to fetch citations for
#query_text = "Bitcoin liquidity"
query_text ="Liquidity, unlike other trade analysis indicators, has no fixed value. As a result, calculating the exact liquidity of the exchange or market is difficult. However, there are other signs that can be used as proxies for liquidity in cryptocurrencies."

# Use OpenAI API to extract relevant keywords from the query text
openai_api_key = "Your_API_Key_Here"
openai.api_key = openai_api_key
openai_model = "text-davinci-003"
openai_response = openai.Completion.create(
    engine=openai_model,
    prompt=f"Generate keywords for the following text:\n\n{query_text}\n\nKeywords:",
    max_tokens=200,
    n=1,
    stop=None,
    temperature=0.7,
)
keywords = openai_response.choices[0].text.strip().split("\n")

# Combine query text and extracted keywords to form a new query
new_query_text = f"{query_text} {' '.join(keywords)}"
#print(new_query_text)


# Define the Crossref API URL
crossref_url = "https://api.crossref.org/works?query="

# Fetch data from Crossref API
response = requests.get(crossref_url + query_text).json()

# Extract desired information and create dataframe
df = pd.DataFrame({
    'Title': [item.get('title', '') for item in response.get('message', {}).get('items', [])],
    'Author(s)': [', '.join([author.get('given', '') + ' ' + author.get('family', '') for author in item.get('author', [])]) for item in response.get('message', {}).get('items', [])],
    'Year': [item.get('created', {}).get('date-parts', [[None]])[0][0] for item in response.get('message', {}).get('items', [])],
    'Journal': [item.get('container-title', [''])[0] for item in response.get('message', {}).get('items', [])],
    'Volume': [item.get('volume', '') for item in response.get('message', {}).get('items', [])],
    'Issue': [item.get('issue', '') for item in response.get('message', {}).get('items', [])],
    'Page': [item.get('page', '') for item in response.get('message', {}).get('items', [])],
    'DOI': [item.get('DOI', '') for item in response.get('message', {}).get('items', [])],
    'Abstract': [item.get('abstract', '') for item in response.get('message', {}).get('items', [])],
})
# Prefix https:// to DOIs
df['DOI'] = df['DOI'].apply(lambda doi: 'https://doi.org/' + doi if doi else '')

# Sort the DataFrame by the latest year to the earliest year
df = df.sort_values(by='Year', ascending=False)


# Use OpenAI API to match the query text with the titles and abstracts of the papers and generate relevance score
relevant_citations = []
for index, row in df.iterrows():
    title = row['Title']
    abstract = row['Abstract']
    year = row['Year']
    journal = row['Journal']
    doi = row['DOI']
    if not abstract:
        continue
    # Combine the title and abstract
    text = f"{title}.{abstract}"
    # Use OpenAI API to generate relevance score for the text and query
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=f"Match the following query with the text and return the relevance score:\n\nQuery: {query_text} [({title}, {year}, {journal}, {doi})]\n\nText: {text}\n\nRelevance Score:",
        max_tokens=1,
        n=1,
        stop=None,
        temperature=0.5,
    )
    relevance_score_str = response.choices[0].text.strip()
    if relevance_score_str:
        relevance_score = float(relevance_score_str)
        # Add the title, relevance score, and DOI to the relevant citations list
        relevant_citations.append((title, relevance_score, year, journal, doi))

# Sort the relevant citations by relevance score in descending order
relevant_citations.sort(key=lambda x: x[1], reverse=True)
#print(relevant_citations)

# Generate bibliography for the most relevant citations
bibliography = ""
for citation in relevant_citations[:5]:
    title = citation[0]
    year = citation[2]
    journal = citation[3]
    doi = citation[4]
    bibliography += f"{title}, {year}, {journal}, {doi}\n"

# Print the original query and the most relevant citations in APA style
print("Query: ", query_text)
print("Citations:")
# Create a DataFrame from relevant_citations
df1 = pd.DataFrame(relevant_citations, columns=['Title', 'Relevance Score', 'Year', 'Journal', 'DOI'])

# Sort the DataFrame by relevance score in descending order
df1 = df1.sort_values(by='Year', ascending=False)
# Print the DataFrame in a tabular format
#print(df1.to_markdown(index=False))
print("Execution complete !")

Query:  Liquidity, unlike other trade analysis indicators, has no fixed value. As a result, calculating the exact liquidity of the exchange or market is difficult. However, there are other signs that can be used as proxies for liquidity in cryptocurrencies.
Citations:


In [None]:
print("******************************** Display Results ********************************")
print("Original Text:")
display(query_text)
print("")
print("Keywords:")
print(keywords)
print("")
print("Citations for the given text based on title and keywords using Semantic Search:")
print("*********************************************************************************************************************")
print(df[['Title', 'Author(s)', 'Year', 'Journal', 'DOI']].head().to_markdown(index=False))
print("")
print("*********************************************************************************************************************")
print("")
print("")
print("*****Most relevant citations based on AI search******:")
if len(relevant_citations) == 0:
    print("No relevant citations found.")
else:
    print(df1[['Title', 'Year', 'Journal', 'DOI']].to_markdown(index=False))
print("")

******************************** Display Results ********************************
Original Text:


'Liquidity, unlike other trade analysis indicators, has no fixed value. As a result, calculating the exact liquidity of the exchange or market is difficult. However, there are other signs that can be used as proxies for liquidity in cryptocurrencies.'


Keywords:
['Liquidity, Trade Analysis, Indicators, Fixed Value, Calculating, Exchange, Market, Cryptocurrencies, Proxies.']

Citations for the given text based on title and keywords using Semantic Search:
*********************************************************************************************************************
| Title                                                                                                                                                                              | Author(s)                       |   Year | Journal                                         | DOI                                             |
|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------|-------:|:------------------------------------------------|:------------------------------------------------|
| ['How much liquidity w