In [43]:
import rispy
import pandas as pd

def read_ris_to_dataframe(file_path):
    # Read the RIS file
    with open(file_path, 'r', encoding='utf-8') as bibliography_file:
        entries = rispy.load(bibliography_file)

    # Convert list of dictionaries to DataFrame
    df = pd.DataFrame(entries)
    return df

def preprocess_jstor_ris_file(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8') as infile, open(output_file, 'w', encoding='utf-8') as outfile:
        for line in infile:
            if line.startswith("TY  -"):
                # Start writing when a RIS entry begins
                outfile.write(line)
                for entry_line in infile:
                    # Stop writing if another provider line is encountered
                    if entry_line.startswith("Provider:"):
                        break
                    outfile.write(entry_line)


In [44]:
# Read all search results
df_mt1 = read_ris_to_dataframe('RIS/multipletext_ebscohost.ris')
df_mt1.head(2)


Unnamed: 0,type_of_reference,authors,primary_title,journal_name,alternate_title3,unknown_tag,year,publication_year,volume,number,...,name_of_database,id,accession_number,publisher,author_address,doi,secondary_title,alternate_title2,tertiary_title,place_published
0,JOUR,"[Davis, Dennis S., Huang, Becky, Yi, Tanisha]",Making Sense of Science Texts: A Mixed-Methods...,Reading Research Quarterly,Reading Research Quarterly,"{'J1': ['Reading Research Quarterly'], 'CP': [...",2017/04//Apr-Jun2017,2017/04//Apr-Jun2017,52.0,2.0,...,eue,,,,,,,,,
1,JOUR,"[Tarchi, Christian, Casado-Ledesma, Lidia, San...",The relationship between theory of mind and mu...,European Journal of Psychology of Education,European Journal of Psychology of Education,,,2023/11/04/,,,...,psyh,2024-25114-001,2024-25114-001,Springer,"Casado-Ledesma, Lidia",10.1007/s10212-023-00755-9,,,,


In [45]:
# This following line is done once ONLY to remove the extra description line.
# preprocess_jstor_ris_file('RIS/multipletext_jstor.ris','RIS/multipletext_jstor_processed.ris')

# Then the order of the first result is rearranged manually,
# such that the title field appears earlier.
# I also force all "name_of_database" to be "JSTOR" as it should be.

df_mt2 = read_ris_to_dataframe('RIS/multipletext_jstor_processed.ris')
df_mt2['name_of_database'] = 'JSTOR'
df_mt2.head(2)


Unnamed: 0,type_of_reference,authors,title,custom1,name_of_database,end_page,number,publisher,year,issn,...,urls,volume,access_date,abstract,secondary_authors,primary_title,tertiary_title,doi,edition,reviewed_item
0,JOUR,"[Boyd, Fenice B., Moore, David W.]",Serving Culturally and Linguistically Diverse ...,Full publication date: March 2011,JSTOR,457,6,"[Wiley, International Reading Association]",2011,10813004,...,[http://www.jstor.org/stable/41203382],54,2024/01/09/,,,,,,,
1,JOUR,"[Guzzetti, Barbara J., Foley, Leslie M., Lesle...","""Nomadic Knowledge"": MEN WRITING ZINES FOR CON...",Full publication date: April 2015,JSTOR,601,7,"[Wiley, International Literacy Association]",2015,10813004,...,[http://www.jstor.org/stable/44011183],58,2024/01/09/,,,,,,,


In [47]:
df_mt3 = read_ris_to_dataframe('RIS/multipletext_scopus.ris')
df_mt3.head(2)

Unnamed: 0,type_of_reference,authors,title,year,secondary_title,volume,custom7,doi,urls,abstract,keywords,language,type_of_work,name_of_database,notes,number,start_page,end_page,short_title
0,JOUR,"[Sirois, J.E.]",Comprehensive investigation evaluating the car...,2021,Regulatory Toxicology and Pharmacology,123.0,104944.0,10.1016/j.yrtph.2021.104944,[https://www.scopus.com/inward/record.uri?eid=...,"In 2019, the California Office of Environmenta...","[Acetaminophen, Carcinogenicity, Epidemiology,...",English,Article,Scopus,[Export Date: 10 January 2024; Cited By: 3],,,,
1,CONF,"[Sorensen, T.C., Pilger, E.J., Wood, M.S., Gre...",Development of the Mission Operations Support ...,2010,SpaceOps 2010 Conference,,,10.2514/6.2010-2230,[https://www.scopus.com/inward/record.uri?eid=...,The Hawaii Space Flight Laboratory (HSFL) was ...,"[Computer software, NASA, Network protocols, P...",English,Conference paper,Scopus,[Export Date: 10 January 2024; Cited By: 4],,,,


In [48]:
df_mt4 = read_ris_to_dataframe('RIS/multipletext_wos.ris')
df_mt4['name_of_database'] = 'WoS'   # force all "name_of_database" to be "Wos", i.e. Web of Science
df_mt4.head(2)

Unnamed: 0,type_of_reference,authors,title,secondary_title,language,keywords,abstract,author_address,custom3,unknown_tag,...,start_page,end_page,doi,accession_number,notes,number,custom6,custom7,first_authors,name_of_database
0,JOUR,"[Rosman, T, Mayer, AK, Merk, S, Kerwer, M]",On the benefits of 'doing science': Does integ...,CONTEMPORARY EDUCATIONAL PSYCHOLOGY,English,"[Epistemic beliefs, Epistemic change, Multiple...",We examine the effects of writing tasks on epi...,"Leibniz Inst Psychol Informat ZPID, Univ Ring ...",Leibniz Institute for Psychology Information &...,{'FU': ['German Joint Initiative for Research ...,...,85,101,10.1016/j.cedpsych.2019.02.007,WOS:000483008600007,[Times Cited in Web of Science Core Collection...,,,,,WoS
1,JOUR,"[Bråten, I, Ferguson, LE, Stromso, HI, Anmarkr...",Justification beliefs and multiple-documents c...,EUROPEAN JOURNAL OF PSYCHOLOGY OF EDUCATION,English,"[Epistemic beliefs, Justification for knowing,...",Building on the multidimensional framework of ...,"Univ Oslo, Dept Educ Res, N-0317 Oslo, Norway",University of Oslo,"{'PU': ['SPRINGER'], 'PI': ['NEW YORK'], 'PA':...",...,879,902,10.1007/s10212-012-0145-2,WOS:000322912600015,[Times Cited in Web of Science Core Collection...,3.0,,,,WoS
