# Combining

In [36]:
import pandas as pd
import os
import re

# specify the folder where csv files are located
folder = 'RMRJ/Articles'

# get all csv files in the directory
csv_files = [f for f in os.listdir(folder) if f.endswith('.csv')]

all_dataframes = []  # list to store all dataframes

# for each csv file, read it into a dataframe, add a new column, and append to the list
for csv_file in csv_files:
    df = pd.read_csv(os.path.join(folder, csv_file))
    csv_file_name = csv_file[:-4]  # remove .csv from the file name

    # extract volume and number from the filename
    match = re.match(r'v(\d+)n(\d+)', csv_file_name)
    if match:
        volume, number = match.groups()
        journal_name = f"Recoletos Multidisciplinary Research Journal Vol. {volume} No. {number}"
    else:
        print(f"Unexpected filename format: {csv_file}")
        continue

    df['journal_name'] = journal_name
    all_dataframes.append(df)

# concatenate all dataframes into one
combined_df = pd.concat(all_dataframes, ignore_index=True)

# write the combined dataframe to a new csv file
combined_df.to_csv('combined-rmrj-articles.csv', index=False)


In [37]:
combined_df

Unnamed: 0,Title,Keywords,Author,DOI,Abstract,References,Published Date,Link,journal_name
0,Timeless Existence and Principle of Creation: ...,"John 1:1, Word, beginning, timeless existence,...",Emiliano C. De Catalina,https://doi.org/10.32871/rmrj2210.01.01,"St. John's Gospel begins with a prologue, serv...","Alvira, T., Clavell, L., & Melendo, T. (1991)....",2022-05-25,https://rmrj.usjr.edu.ph/rmrj/index.php/RMRJ/a...,Recoletos Multidisciplinary Research Journal V...
1,Ratooning Response of Lowland Rice (Oryza sati...,"approach, gross margin, lowland rice, manageme...","Dionesio M. BaÅˆoc, Victor B. Asio",https://doi.org/10.32871/rmrj2210.01.08,This study aimed to determine the ratooning ab...,"Asio, V. B. (1996). Characteristics, weatherin...",2022-06-14,https://rmrj.usjr.edu.ph/rmrj/index.php/RMRJ/a...,Recoletos Multidisciplinary Research Journal V...
2,Paternal Resilience in Time of Pandemic: A Phe...,"Special Education, fathers of children with au...","Sarah Therese P. Jardenil, Dennis V. Madrigal",https://doi.org/10.32871/rmrj2210.01.02,This phenomenological research was conducted t...,"Alhuzimi, T. (2021). Stress and emotional well...",2022-05-26,https://rmrj.usjr.edu.ph/rmrj/index.php/RMRJ/a...,Recoletos Multidisciplinary Research Journal V...
3,An Inquiry into the Problems Concerning Filipi...,"Filipino, values, norms, validity, efficacy, p...",Jiolito L. Benitez,https://doi.org/10.32871/rmrj2210.01.03,This paper inquires into the problems concerni...,"Aguas, J. J. S. (2016). The Filipino value of ...",2022-05-27,https://rmrj.usjr.edu.ph/rmrj/index.php/RMRJ/a...,Recoletos Multidisciplinary Research Journal V...
4,Correlating the Psychological and Spiritual We...,"Psychology, psycho-spiritual well-being, junio...","Chris Feli Joy P. Tajonera, Dennis V. Madrigal",https://doi.org/10.32871/rmrj2210.01.05,This descriptive-correlational study assesses ...,"Ahmadpoori, S. F., & Motaghi, M. (2020). The s...",2022-05-31,https://rmrj.usjr.edu.ph/rmrj/index.php/RMRJ/a...,Recoletos Multidisciplinary Research Journal V...
...,...,...,...,...,...,...,...,...,...
146,DSS Framework: A Proposal for Disaster Logisti...,"ICT-based logistic, logistics, decision tree m...","Rene H. Alipio, Francis F. Balahadia",https://doi.org/10.32871/rmrj2008.02.04,The Philippinesâ€™ location within the Pacific...,"A-Iryani, N. & Gassin, T. (2005). Logistic and...",2020-12-29,https://rmrj.usjr.edu.ph/rmrj/index.php/RMRJ/a...,Recoletos Multidisciplinary Research Journal V...
147,Ratooning Response of Lowland Rice NSIC Rc352 ...,"lowland rice, basal, fertilization, ratoon, to...",Dionesio Maglahus Banoc,https://doi.org/10.32871/rmrj2008.02.05,This study seeks to determine the effect of N ...,"Aulakh, M., Khera, T.S., Doran, J.W., Singh, K...",2020-12-31,https://rmrj.usjr.edu.ph/rmrj/index.php/RMRJ/a...,Recoletos Multidisciplinary Research Journal V...
148,Direct and Indirect Factors Affecting Teachers...,"education, perceived control, teachersâ€™ pers...",Andrian A. Dela Cruz,https://doi.org/10.32871/rmrj2008.02.06,The outbreak of the COVID-19 has brought about...,"Becker, E., GÃ¶tz, T., Morger, V., & Ranellucc...",2020-12-31,https://rmrj.usjr.edu.ph/rmrj/index.php/RMRJ/a...,Recoletos Multidisciplinary Research Journal V...
149,Graduate Education during COVID-19 Pandemic: I...,"challenges, COVID-19, Filipino, graduate educa...",Inero Ancho,https://doi.org/10.32871/rmrj2008.02.07,This qualitative research describes the experi...,"Aliyyah, R. R., Rachmadtullah, R., Samsudin, A...",2020-12-31,https://rmrj.usjr.edu.ph/rmrj/index.php/RMRJ/a...,Recoletos Multidisciplinary Research Journal V...


# Pre-embedding

### Summary

In [38]:
# define a function to concatenate the values with their labels
def concat_columns(row):
    title = f"title: {row['Title']}" if pd.notnull(row['Title']) else 'title: null'
    keywords = f"keywords: {row['Keywords']}" if pd.notnull(row['Keywords']) else 'keywords: null'
    author = f"author: {row['Author']}" if pd.notnull(row['Author']) else 'author: null'
    doi = f"doi: {row['DOI']}" if pd.notnull(row['DOI']) else 'doi: null'
    abstract = f"abstract: {row['Abstract']}" if pd.notnull(row['Abstract']) else 'abstract: null'
    published_date = f"published date: {row['Published Date']}" if pd.notnull(row['Published Date']) else 'published date: null'
    link = f"link: {row['Link']}" if pd.notnull(row['Link']) else 'link: null'
    journal_name = f"journal name: {row['journal_name']}" if pd.notnull(row['journal_name']) else 'journal name: null'
    return f"{title}, {keywords}, {author}, {doi}, {abstract}, {published_date}, {link}, {journal_name}"

# apply the function to each row of the dataframe to create the new column
combined_df['summary'] = combined_df.apply(concat_columns, axis=1)


In [39]:
combined_df

Unnamed: 0,Title,Keywords,Author,DOI,Abstract,References,Published Date,Link,journal_name,summary
0,Timeless Existence and Principle of Creation: ...,"John 1:1, Word, beginning, timeless existence,...",Emiliano C. De Catalina,https://doi.org/10.32871/rmrj2210.01.01,"St. John's Gospel begins with a prologue, serv...","Alvira, T., Clavell, L., & Melendo, T. (1991)....",2022-05-25,https://rmrj.usjr.edu.ph/rmrj/index.php/RMRJ/a...,Recoletos Multidisciplinary Research Journal V...,title: Timeless Existence and Principle of Cre...
1,Ratooning Response of Lowland Rice (Oryza sati...,"approach, gross margin, lowland rice, manageme...","Dionesio M. BaÅˆoc, Victor B. Asio",https://doi.org/10.32871/rmrj2210.01.08,This study aimed to determine the ratooning ab...,"Asio, V. B. (1996). Characteristics, weatherin...",2022-06-14,https://rmrj.usjr.edu.ph/rmrj/index.php/RMRJ/a...,Recoletos Multidisciplinary Research Journal V...,title: Ratooning Response of Lowland Rice (Ory...
2,Paternal Resilience in Time of Pandemic: A Phe...,"Special Education, fathers of children with au...","Sarah Therese P. Jardenil, Dennis V. Madrigal",https://doi.org/10.32871/rmrj2210.01.02,This phenomenological research was conducted t...,"Alhuzimi, T. (2021). Stress and emotional well...",2022-05-26,https://rmrj.usjr.edu.ph/rmrj/index.php/RMRJ/a...,Recoletos Multidisciplinary Research Journal V...,title: Paternal Resilience in Time of Pandemic...
3,An Inquiry into the Problems Concerning Filipi...,"Filipino, values, norms, validity, efficacy, p...",Jiolito L. Benitez,https://doi.org/10.32871/rmrj2210.01.03,This paper inquires into the problems concerni...,"Aguas, J. J. S. (2016). The Filipino value of ...",2022-05-27,https://rmrj.usjr.edu.ph/rmrj/index.php/RMRJ/a...,Recoletos Multidisciplinary Research Journal V...,title: An Inquiry into the Problems Concerning...
4,Correlating the Psychological and Spiritual We...,"Psychology, psycho-spiritual well-being, junio...","Chris Feli Joy P. Tajonera, Dennis V. Madrigal",https://doi.org/10.32871/rmrj2210.01.05,This descriptive-correlational study assesses ...,"Ahmadpoori, S. F., & Motaghi, M. (2020). The s...",2022-05-31,https://rmrj.usjr.edu.ph/rmrj/index.php/RMRJ/a...,Recoletos Multidisciplinary Research Journal V...,title: Correlating the Psychological and Spiri...
...,...,...,...,...,...,...,...,...,...,...
146,DSS Framework: A Proposal for Disaster Logisti...,"ICT-based logistic, logistics, decision tree m...","Rene H. Alipio, Francis F. Balahadia",https://doi.org/10.32871/rmrj2008.02.04,The Philippinesâ€™ location within the Pacific...,"A-Iryani, N. & Gassin, T. (2005). Logistic and...",2020-12-29,https://rmrj.usjr.edu.ph/rmrj/index.php/RMRJ/a...,Recoletos Multidisciplinary Research Journal V...,title: DSS Framework: A Proposal for Disaster ...
147,Ratooning Response of Lowland Rice NSIC Rc352 ...,"lowland rice, basal, fertilization, ratoon, to...",Dionesio Maglahus Banoc,https://doi.org/10.32871/rmrj2008.02.05,This study seeks to determine the effect of N ...,"Aulakh, M., Khera, T.S., Doran, J.W., Singh, K...",2020-12-31,https://rmrj.usjr.edu.ph/rmrj/index.php/RMRJ/a...,Recoletos Multidisciplinary Research Journal V...,title: Ratooning Response of Lowland Rice NSIC...
148,Direct and Indirect Factors Affecting Teachers...,"education, perceived control, teachersâ€™ pers...",Andrian A. Dela Cruz,https://doi.org/10.32871/rmrj2008.02.06,The outbreak of the COVID-19 has brought about...,"Becker, E., GÃ¶tz, T., Morger, V., & Ranellucc...",2020-12-31,https://rmrj.usjr.edu.ph/rmrj/index.php/RMRJ/a...,Recoletos Multidisciplinary Research Journal V...,title: Direct and Indirect Factors Affecting T...
149,Graduate Education during COVID-19 Pandemic: I...,"challenges, COVID-19, Filipino, graduate educa...",Inero Ancho,https://doi.org/10.32871/rmrj2008.02.07,This qualitative research describes the experi...,"Aliyyah, R. R., Rachmadtullah, R., Samsudin, A...",2020-12-31,https://rmrj.usjr.edu.ph/rmrj/index.php/RMRJ/a...,Recoletos Multidisciplinary Research Journal V...,title: Graduate Education during COVID-19 Pand...


### ID assignment

In [40]:
import uuid

combined_df['id'] = combined_df.apply(lambda _: str(uuid.uuid4()), axis=1)

In [41]:
combined_df

Unnamed: 0,Title,Keywords,Author,DOI,Abstract,References,Published Date,Link,journal_name,summary,id
0,Timeless Existence and Principle of Creation: ...,"John 1:1, Word, beginning, timeless existence,...",Emiliano C. De Catalina,https://doi.org/10.32871/rmrj2210.01.01,"St. John's Gospel begins with a prologue, serv...","Alvira, T., Clavell, L., & Melendo, T. (1991)....",2022-05-25,https://rmrj.usjr.edu.ph/rmrj/index.php/RMRJ/a...,Recoletos Multidisciplinary Research Journal V...,title: Timeless Existence and Principle of Cre...,111553fe-23fc-45e4-ad46-0c56b61aee0e
1,Ratooning Response of Lowland Rice (Oryza sati...,"approach, gross margin, lowland rice, manageme...","Dionesio M. BaÅˆoc, Victor B. Asio",https://doi.org/10.32871/rmrj2210.01.08,This study aimed to determine the ratooning ab...,"Asio, V. B. (1996). Characteristics, weatherin...",2022-06-14,https://rmrj.usjr.edu.ph/rmrj/index.php/RMRJ/a...,Recoletos Multidisciplinary Research Journal V...,title: Ratooning Response of Lowland Rice (Ory...,737dc86a-c28d-4002-ab11-4e1ae1ae946d
2,Paternal Resilience in Time of Pandemic: A Phe...,"Special Education, fathers of children with au...","Sarah Therese P. Jardenil, Dennis V. Madrigal",https://doi.org/10.32871/rmrj2210.01.02,This phenomenological research was conducted t...,"Alhuzimi, T. (2021). Stress and emotional well...",2022-05-26,https://rmrj.usjr.edu.ph/rmrj/index.php/RMRJ/a...,Recoletos Multidisciplinary Research Journal V...,title: Paternal Resilience in Time of Pandemic...,c4d3893e-977e-42ab-904a-80a94a67b9b1
3,An Inquiry into the Problems Concerning Filipi...,"Filipino, values, norms, validity, efficacy, p...",Jiolito L. Benitez,https://doi.org/10.32871/rmrj2210.01.03,This paper inquires into the problems concerni...,"Aguas, J. J. S. (2016). The Filipino value of ...",2022-05-27,https://rmrj.usjr.edu.ph/rmrj/index.php/RMRJ/a...,Recoletos Multidisciplinary Research Journal V...,title: An Inquiry into the Problems Concerning...,449a1ec7-c2d8-4674-b714-a039305e9602
4,Correlating the Psychological and Spiritual We...,"Psychology, psycho-spiritual well-being, junio...","Chris Feli Joy P. Tajonera, Dennis V. Madrigal",https://doi.org/10.32871/rmrj2210.01.05,This descriptive-correlational study assesses ...,"Ahmadpoori, S. F., & Motaghi, M. (2020). The s...",2022-05-31,https://rmrj.usjr.edu.ph/rmrj/index.php/RMRJ/a...,Recoletos Multidisciplinary Research Journal V...,title: Correlating the Psychological and Spiri...,a9e24e6e-cfd0-4bc5-a30a-8786816b040e
...,...,...,...,...,...,...,...,...,...,...,...
146,DSS Framework: A Proposal for Disaster Logisti...,"ICT-based logistic, logistics, decision tree m...","Rene H. Alipio, Francis F. Balahadia",https://doi.org/10.32871/rmrj2008.02.04,The Philippinesâ€™ location within the Pacific...,"A-Iryani, N. & Gassin, T. (2005). Logistic and...",2020-12-29,https://rmrj.usjr.edu.ph/rmrj/index.php/RMRJ/a...,Recoletos Multidisciplinary Research Journal V...,title: DSS Framework: A Proposal for Disaster ...,259299d7-d843-4bd6-aecb-483df94d02bb
147,Ratooning Response of Lowland Rice NSIC Rc352 ...,"lowland rice, basal, fertilization, ratoon, to...",Dionesio Maglahus Banoc,https://doi.org/10.32871/rmrj2008.02.05,This study seeks to determine the effect of N ...,"Aulakh, M., Khera, T.S., Doran, J.W., Singh, K...",2020-12-31,https://rmrj.usjr.edu.ph/rmrj/index.php/RMRJ/a...,Recoletos Multidisciplinary Research Journal V...,title: Ratooning Response of Lowland Rice NSIC...,43eba709-e6de-4435-85a9-c9116bdfb7b1
148,Direct and Indirect Factors Affecting Teachers...,"education, perceived control, teachersâ€™ pers...",Andrian A. Dela Cruz,https://doi.org/10.32871/rmrj2008.02.06,The outbreak of the COVID-19 has brought about...,"Becker, E., GÃ¶tz, T., Morger, V., & Ranellucc...",2020-12-31,https://rmrj.usjr.edu.ph/rmrj/index.php/RMRJ/a...,Recoletos Multidisciplinary Research Journal V...,title: Direct and Indirect Factors Affecting T...,d940566f-1d0a-4981-a3cc-6069057e6c36
149,Graduate Education during COVID-19 Pandemic: I...,"challenges, COVID-19, Filipino, graduate educa...",Inero Ancho,https://doi.org/10.32871/rmrj2008.02.07,This qualitative research describes the experi...,"Aliyyah, R. R., Rachmadtullah, R., Samsudin, A...",2020-12-31,https://rmrj.usjr.edu.ph/rmrj/index.php/RMRJ/a...,Recoletos Multidisciplinary Research Journal V...,title: Graduate Education during COVID-19 Pand...,f88b3737-6d5f-492f-b6ad-a334edb07fb7


In [42]:
combined_df.to_csv('combined-rmrj-articles.csv', index=False)