In [None]:
import rispy

# Path to your .ris file
ris_file_path = 'path.ris'

def read_ris_file(file_path):
    # Specify the encoding to 'utf-8' when opening the file
    with open(file_path, 'r', encoding='utf-8') as bibliography_file:
        entries = rispy.load(bibliography_file)

        for entry in entries:
            # Example: print the title and publication year of each entry
            print(f"Title: {entry.get('title', 'No title available')}")
            print(f"Year: {entry.get('year', 'No year available')}")
            print("---")

# Call the function with your file path
read_ris_file(ris_file_path)


Extract keywords from the ris file

In [None]:
import pandas as pd
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from collections import Counter


# Path to your .ris file
ris_file_path = 'path.ris'

# Function to extract keywords
def extract_keywords(file_path):
    keywords = []
    with open(file_path, 'r', encoding='utf-8') as bibliography_file:
        entries = rispy.load(bibliography_file)
        for entry in entries:
            if 'title' in entry:  # Check if the 'KW' field exists
                # Assuming keywords are separated by semicolons or are in a list
                if isinstance(entry['title'], list):
                    keywords.extend(entry['title'])
                else:
                    # Split the keywords string into a list if they're not already
                    keywords.extend(entry['title'].split(';'))
    return keywords

# Extract keywords and store them in a list
keywords_list = extract_keywords(ris_file_path)

# Printing the first few keywords to check
print(keywords_list[:10])

# Assuming 'keywords' is a list containing all your keywords
keyword_counts = Counter(keywords_list)  # Count the frequencies of each keyword
df_keywords = pd.DataFrame(keyword_counts.items(), columns=['Keyword', 'Frequency'])

# Display the DataFrame
print(df_keywords.sort_values(by='Frequency', ascending=False))

# Generate a word cloud
wordcloud = WordCloud(width = 800, height = 800, 
                      background_color ='white', 
                      min_font_size = 10).generate_from_frequencies(keyword_counts)

# Plot the WordCloud image                        
plt.figure(figsize = (8, 8), facecolor = None) 
plt.imshow(wordcloud) 
plt.axis("off") 
plt.tight_layout(pad = 0) 
  
plt.show()

Barplot

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import rispy

# Function to extract publication years from the RIS file
def extract_publication_years(file_path):
    years = []
    with open(file_path, 'r', encoding='utf-8') as bibliography_file:
        entries = rispy.load(bibliography_file)
        for entry in entries:
            if 'PY' in entry:  # 'PY' is the common tag for publication year in RIS files
                years.append(entry['PY'])
    return years

# Path to your .ris file
ris_file_path = 'path'

# Extract publication years
publication_years = extract_publication_years(ris_file_path)

# Convert the list of years to a DataFrame
df_years = pd.DataFrame(publication_years, columns=['Year'])

# Count the occurrences of each year
year_counts = df_years['Year'].value_counts().sort_index()

# Convert the Series to a DataFrame for plotting
df_year_counts = pd.DataFrame(year_counts).reset_index()
df_year_counts.columns = ['Year', 'Frequency']  # Renaming columns for clarity

# Sort the DataFrame by 'Year'
df_year_counts = df_year_counts.sort_values('Year')

# Plotting
plt.figure(figsize=(12, 6))
plt.bar(df_year_counts['Year'], df_year_counts['Frequency'], color='skyblue')
plt.xlabel('Publication Year', fontsize=14)
plt.ylabel('Number of Publications', fontsize=14)
plt.title('Number of Publications per Year', fontsize=16)
plt.xticks(rotation=45)
plt.tight_layout()  # Adjust layout to not cut off labels
plt.show()
