In [1]:
import pandas as pd
import re
import os

# Function to extract name and eid data from the References string
def extract_name_eid(reference_text):
    # Use regex to find all patterns of name and eid
    matches = re.findall(r"<(.*?), \{eid: (\d+)\}>", reference_text)
    return matches

# Main function to create CSV files from the input references
def create_csv_files_from_references(input_csv_path):
    # Create the "paper" folder if it doesn't exist
    os.makedirs("paper", exist_ok=True)
    
    # Read the input CSV file
    df = pd.read_csv(input_csv_path)
    
    # Loop through each row in the CSV
    for _, row in df.iterrows():
        title = row['Title']
        references = row['References']
        
        # Extract name and eid pairs from the References text
        name_eid_pairs = extract_name_eid(references)
        
        # Create a DataFrame with the extracted results
        df_output = pd.DataFrame(name_eid_pairs, columns=['Name', 'EID'])
        
        # Replace invalid characters for file names in the title
        safe_title = "".join([c for c in title if c.isalnum() or c in (" ", "_")]).strip()
        
        # Save the DataFrame to a CSV file named after the Title value
        #output_path = f"{safe_title}.csv"
        #df_output.to_csv(output_path, index=False)
        #print(f"File created: {output_path}")
        
         # Define the output path within the "paper" folder
        output_path = os.path.join("paper", f"{safe_title}.csv")
        
        # Save the DataFrame to a CSV file named after the Title value
        df_output.to_csv(output_path, index=False)
        print(f"File created: {output_path}")

# Example usage
create_csv_files_from_references("SOTA_SECO_CPS_2024-11-27_16-9-26.csv")

File created: paper\Conceptual Framework of Information Flow Synchronization Throughout the Building Lifecycle.csv
File created: paper\Modelbased Trustworthiness Evaluation of Autonomous CyberPhysical Production Systems A Systematic Mapping Study.csv
File created: paper\Survey and Practice on Architecture and Deployment Method of Digital Twin System for Intelligent Substation.csv
File created: paper\Architecting Digital Twins.csv
File created: paper\How Can Digital Twins Support the Net Zero Vision.csv
File created: paper\A REVIEW OF DIGITAL TWIN APPLICATIONS IN CONSTRUCTION.csv
File created: paper\Service Computing for Industry 40 State of the Art Challenges and Research Opportunities.csv
File created: paper\A Computer Science Perspective on Digital Transformation in Production.csv
File created: paper\Survey on Cloud Robotics Architecture and ModelDriven Reference Architecture for Decentralized Multicloud HeterogeneousRobotics Platform.csv
File created: paper\A Bibliometric Analysis o

In [13]:
#### with EID
import pandas as pd
import re
import os

# Function to extract name and eid data from the References string, adding prefix to eid
def extract_name_eid(reference_text):
    # Use regex to find all patterns of name and eid, adding the prefix to eid
    matches = re.findall(r"<(.*?), \{eid: (\d+)\}>", reference_text)
    # Add '2-s2.0-' prefix to each eid
    return [(name, f"2-s2.0-{eid}") for name, eid in matches]  # 2-s2.0-

# Main function to create CSV files from the input references
def create_csv_files_from_references(input_csv_path):
    # Create the "paper" folder if it doesn't exist
    os.makedirs("paper", exist_ok=True)
    
    # Read the input CSV file
    df = pd.read_csv(input_csv_path)
    
    # Loop through each row in the CSV
    for _, row in df.iterrows():
        title = row['Title']
        references = row['References']
        
        # Extract name and eid pairs from the References text with prefix
        name_eid_pairs = extract_name_eid(references)
        
        # Create a DataFrame with the extracted results
        df_output = pd.DataFrame(name_eid_pairs, columns=['Name', 'EID'])
        
        # Replace invalid characters for file names in the title
        safe_title = "".join([c for c in title if c.isalnum() or c in (" ", "_")]).strip()
        
        # Define the output path within the "paper" folder
        output_path = os.path.join("paper", f"{safe_title}.csv")
        
        # Save the DataFrame to a CSV file named after the Title value
        df_output.to_csv(output_path, index=False)
        print(f"File created: {output_path}")

# Usage
create_csv_files_from_references("SOTA_SECO_CPS_2024-11-14_19-47-22.csv")

### Import Snowballed papers

In [29]:
# Import other libraries
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sbs
import re

# Options for plots
plt.rcParams['figure.figsize'] = (10, 6)
sbs.set('paper')

# Import litstudy
path = os.path.abspath(os.path.join('..'))
if path not in sys.path:
    sys.path.append(path)

import litstudy

In [30]:
from datetime import datetime

currentDateAndTime = datetime.now()
filename = (f'SOTA_SECO_CPS_{currentDateAndTime.year}-{currentDateAndTime.month}-{currentDateAndTime.day}'
            f'_{currentDateAndTime.hour}-{currentDateAndTime.minute}-{currentDateAndTime.second}')
filename_xlsx = (f'SOTA_SECO_CPS_{currentDateAndTime.year}-{currentDateAndTime.month}-{currentDateAndTime.day}'
            f'_{currentDateAndTime.hour}-{currentDateAndTime.minute}-{currentDateAndTime.second}.xlsx')

print(filename)

SOTA_SECO_CPS_2024-11-28_17-38-53


In [123]:
############################## LOAD Cleaned papers ###################################

# with open('AAA/studies.txt', 'r') as file:
#     data = file.read().splitlines()
#     print(data)
    
# Load csv file
docs_bib = litstudy.load_scopus_csv('paper/Survey on Cloud Robotics Architecture and ModelDriven Reference Architecture for Decentralized Multicloud HeterogeneousRobotics Platform.csv')
print(len(docs_bib), 'papers loaded from selected papers list')

56 papers loaded from selected papers list


In [124]:
import logging
logging.getLogger().setLevel(logging.CRITICAL)

docs_found_scopus, docs_notfound_scopus = litstudy.refine_scopus(docs_bib, search_title=True)

100%|██████████| 56/56 [00:41<00:00,  1.36it/s]


In [125]:
print(len(docs_found_scopus), 'papers found on Scopus')
print(len(docs_notfound_scopus), 'papers were not found')
docs_bib_scopus = docs_found_scopus | docs_notfound_scopus
len(docs_bib_scopus)

29 papers found on Scopus
27 papers were not found


56

In [126]:
docs_bib = docs_bib_scopus # docs_bib_scopus | docs_bib_SemanticScholar | docs_bib_CrossRef
docs_filtered = docs_bib # .filter_docs(lambda d: d.publication_year >= 2019)

In [127]:
index = 0
data = []
while index < len(docs_filtered):
    # re.sub('[<\[\]>]', '', str(docs_filtered[index].authors))

    authorList = []
    for author in docs_filtered[index].authors or []:
        authorList.append(author.name)

    if type(docs_filtered[index].id.doi) == type(None):
        print(docs_filtered[index].title)
        doi_paper = ''
        doi_paper_custom = ''
        # print(doi_paper)
    else:
        doi_paper = str(docs_filtered[index].id.doi) # 'https://www.doi.org/' + 
        doi_paper_custom = 'https://www.doi.org/' + str(docs_filtered[index].id.doi)
        print(doi_paper)

    data.append({'Authors': '', 'Author full names': re.sub(r'[\[\'\]]', '', str(authorList)), 'Author(s) ID': '', 
    'Title': docs_filtered[index].title, 'Year': docs_filtered[index].publication_year, 'Source title': docs_filtered[index].publication_source, 
    'Volume': '', 'Issue': '', 'Art. No.': '', 'Page start': '', 'Page end': '', 'Page count': '', 'Cited by': docs_filtered[index].citation_count, 
    'DOI': doi_paper, 'Link': doi_paper_custom,  'Affiliations': re.sub(r'[\[\'\]]', '', str(docs_filtered[index].affiliations)), 
    'Authors with affiliations': '', 'Abstract': docs_filtered[index].abstract, 'Author Keywords': re.sub(r'[\[\'\]]', '', str(docs_filtered[index].keywords)), 
    'Index Keywords': '', 'Molecular Sequence Numbers': '',  'Chemicals/CAS': '', 'Tradenames': '', 'Manufacturers': '', 
    'Funding Details': '', 'Funding Texts': '', 'References': re.sub(r'[\[\'\]]', '', str(docs_filtered[index].references)), 'Correspondence Address': '', 'Editors': '', 'Publisher': docs_filtered[index].publisher, 
    'Sponsors': '', 'Conference name': '', 'Conference date': str(docs_filtered[index].publication_date), 'Conference location': '', 'Conference code': '', 'ISSN': '', 
    'ISBN': '', 'CODEN': '', 'PubMed ID': docs_filtered[index].id.pubmed, 'Language of Original Document': docs_filtered[index].language, 'Abbreviated Source Title': '', 'Document Type': docs_filtered[index].source_type, 
    'Publication Stage': '', 'Open Access': '', 'Source': '', 'EID': docs_filtered[index].id.scopusid})
    
    index += 1

# Saving first group of data to a single excel file
df = pd.DataFrame(data, columns=['Authors', 'Author full names', 'Author(s) ID', 
    'Title', 'Year', 'Source title', 
    'Volume', 'Issue', 'Art. No.', 'Page start', 'Page end', 'Page count', 'Cited by', 
    'DOI', 'Link',  'Affiliations', 'Authors with affiliations', 'Abstract', 'Author Keywords', 
    'Index Keywords', 'Molecular Sequence Numbers',  'Chemicals/CAS', 'Tradenames', 'Manufacturers', 
    'Funding Details', 'Funding Texts', 'References', 'Correspondence Address', 'Editors', 'Publisher', 
    'Sponsors', 'Conference name', 'Conference date', 'Conference location', 'Conference code', 'ISSN', 
    'ISBN', 'CODEN', 'PubMed ID', 'Language of Original Document', 'Abbreviated Source Title', 'Document Type', 
    'Publication Stage', 'Open Access', 'Source', 'EID'])

# Saving first group of data to a single csv file
df.to_csv('Snow/P12_' + filename + '.csv')

# Saving first group of data to a single excel file
# df.to_excel('Snow/P2_' + filename_xlsx, index=False)

10.3390/robotics7030047
10.1016/j.infsof.2011.11.009
10.5220/0005806101780185
10.1109/ACCESS.2020.3000437
10.1109/TASE.2014.2376492
10.1109/ISCC.2016.7543796
10.1109/DSAA.2019.00081
10.1145/2890784
10.1109/SysEng.2016.7753148
10.1109/IROS.2013.6697184
10.1109/ACCESS.2016.2574979
10.1109/TWC.2018.2813363
10.1109/ACCESS.2019.2929296
10.1007/978-3-030-36150-1_56
10.1109/CIOT.2016.7872914
10.1007/s00146-017-0792-6
10.1002/cpe.3708
10.1145/2245276.2232049
10.3390/app10072574
10.1109/MCOM.2018.1700131
10.1109/TASE.2013.2244883
10.1145/3326066
10.1109/TII.2016.2530404
10.1016/j.ijpe.2019.07.033
10.1109/MC.2018.3011041
10.1109/aCCESS.2020.2967218
10.1016/j.jnca.2017.09.002
10.1117/12.926523
10.1109/SYSOSE.2017.7994942
A distributed architecture for supervision of autonomous multi-robot missions: Application to air-sea scenarios, {eid: 84982299368, doi: 10.1007/s10514-016-9603-z}>, <Edge-enabled autonomous navigation and computer vision as a service: A study on mobile robots onboard energy cons

In [142]:
import pandas as pd
import re
from charset_normalizer import from_path

def sanitize_text(text):
    """Sanitize text to handle problematic characters."""
    replacements = {
        "ŌĆ‘": "'", "ŌĆ’": "'", "ŌĆœ": '"', "ŌĆ": '"', "┬®": "®"
    }
    for old, new in replacements.items():
        text = text.replace(old, new)
    return text

def filter_csv_by_or_condition(input_csv, output_csv, search_conditions, columns_to_search):
    """
    Filters rows from a CSV file based on OR conditions (case-insensitive).

    Parameters:
        input_csv (str): Path to the input CSV file.
        output_csv (str): Path to save the filtered CSV.
        search_conditions (list): List of strings for OR conditions.
        columns_to_search (list): List of column names to search within.

    Returns:
        None: The filtered rows are saved to the output CSV file.
    """
    # Detect the file's encoding using charset-normalizer
    detected = from_path(input_csv).best()
    detected_encoding = detected.encoding
    print(f"Detected encoding: {detected_encoding}")

    # Load the CSV file into a DataFrame using the detected encoding
    try:
        df = pd.read_csv(input_csv, encoding=detected_encoding)
        print(f"Loaded {len(df)} rows from the input CSV.")
    except Exception as e:
        print(f"Error loading CSV: {e}")
        return

    # Check if required columns exist
    missing_columns = [col for col in columns_to_search if col not in df.columns]
    if missing_columns:
        print(f"Missing columns in the input CSV: {missing_columns}")
        return

    # Sanitize text in relevant columns
    df[columns_to_search] = df[columns_to_search].fillna("").astype(str).applymap(sanitize_text)

    # Compile a regex pattern for OR conditions (case-insensitive)
    or_pattern = re.compile('|'.join(re.escape(term).replace(r'\*', '.*') for term in search_conditions), re.IGNORECASE)
    print(f"Compiled OR regex pattern: {or_pattern}")

    # Create a boolean mask: rows must match the OR pattern in any of the specified columns
    def row_matches(row):
        row_text = ' '.join(row.astype(str))  # Combine the text of the specified columns
        return bool(or_pattern.search(row_text))

    mask = df[columns_to_search].apply(row_matches, axis=1)
    print(f"Number of matching rows: {mask.sum()}")

    # Log skipped rows for debugging
    for idx, row in df[~mask][columns_to_search].iterrows():
        row_text = ' '.join(row.astype(str))
        print(f"Row {idx} skipped: {row_text}")

    # Filter the DataFrame using the mask
    filtered_df = df[mask]
    if filtered_df.empty:
        print("No rows matched the specified conditions.")
    else:
        filtered_df.to_csv(output_csv, index=False)
        print(f"Filtered rows saved to {output_csv}")

# Example usage
input_csv = "Snow/P12_SOTA_SECO_CPS_2024-11-28_17-38-53.csv"
output_csv = "filtered_output.csv"

# Define search conditions for OR logic
search_conditions = [
    "systematic", "Systematic", "literature", "Literature", "review", "Review",
    "Mapping", "mapping", "study", "Study", "SLR", "SMS", "survey", "Survey"
]

columns_to_search = ["Title", "Abstract", "Author Keywords"]

filter_csv_by_or_condition(input_csv, output_csv, search_conditions, columns_to_search)

Detected encoding: utf_8
Loaded 9 rows from the input CSV.
Compiled OR regex pattern: re.compile('systematic|Systematic|literature|Literature|review|Review|Mapping|mapping|study|Study|SLR|SMS|survey|Survey', re.IGNORECASE)
Number of matching rows: 7
Row 0 skipped: Automating big data analysis based on deep learning generation by automatic service composition © 2019 IEEE.Automation of Big Data Analysis (BDA) procedure gives us a great profit in the era of Big Data and Artificial Intelligence. BDA procedure can be efficiently automated by the automatic service composition concept efficiently. Our previous work for Auto-BDA shows a great future prospect in reducing turnaround time for data analysis. Moreover, it requires consideration of the automation with a well-geared combination of the data preparation and the optimal model (deep learning) generation. This paper shows the construction of automating BDA and model generation (here deep learning) together with data preparation and parame

In [143]:
############################## LOAD Cleaned papers ###################################

# with open('AAA/studies.txt', 'r') as file:
#     data = file.read().splitlines()
#     print(data)

In [144]:
# Load csv file
docs_bib_1 = litstudy.load_scopus_csv('P1_SOTA_SECO_CPS_2024-11-28_17-38-53_snowballed.csv')
print(len(docs_bib_1), 'papers loaded from selected papers list')

import logging
logging.getLogger().setLevel(logging.CRITICAL)

docs_found_scopus, docs_notfound_scopus = litstudy.refine_scopus(docs_bib_1, search_title=True)

print(len(docs_found_scopus), 'papers found on Scopus')
print(len(docs_notfound_scopus), 'papers were not found')
docs_bib_scopus_1 = docs_found_scopus | docs_notfound_scopus
len(docs_bib_scopus_1)

9 papers loaded from selected papers list


100%|██████████| 9/9 [00:00<00:00, 359.06it/s]

9 papers found on Scopus
0 papers were not found





9

In [145]:
# Load csv file
docs_bib_2 = litstudy.load_scopus_csv('P2_SOTA_SECO_CPS_2024-11-28_17-38-53_snowballed.csv')
print(len(docs_bib_2), 'papers loaded from selected papers list')

import logging
logging.getLogger().setLevel(logging.CRITICAL)

docs_found_scopus, docs_notfound_scopus = litstudy.refine_scopus(docs_bib_2, search_title=True)

print(len(docs_found_scopus), 'papers found on Scopus')
print(len(docs_notfound_scopus), 'papers were not found')
docs_bib_scopus_2 = docs_found_scopus | docs_notfound_scopus
len(docs_bib_scopus_2)

26 papers loaded from selected papers list


100%|██████████| 26/26 [00:10<00:00,  2.43it/s]

24 papers found on Scopus
2 papers were not found





26

In [146]:
# Load csv file
docs_bib_3 = litstudy.load_scopus_csv('P3_SOTA_SECO_CPS_2024-11-28_17-38-53_snowballed.csv')
print(len(docs_bib_3), 'papers loaded from selected papers list')

import logging
logging.getLogger().setLevel(logging.CRITICAL)

docs_found_scopus, docs_notfound_scopus = litstudy.refine_scopus(docs_bib_3, search_title=True)

print(len(docs_found_scopus), 'papers found on Scopus')
print(len(docs_notfound_scopus), 'papers were not found')
docs_bib_scopus_3 = docs_found_scopus | docs_notfound_scopus
len(docs_bib_scopus_3)

2 papers loaded from selected papers list


100%|██████████| 2/2 [00:00<00:00,  2.87it/s]

2 papers found on Scopus
0 papers were not found





2

In [147]:
# Load csv file
docs_bib_4 = litstudy.load_scopus_csv('P4_SOTA_SECO_CPS_2024-11-28_17-38-53_snowballed.csv')
print(len(docs_bib_4), 'papers loaded from selected papers list')

import logging
logging.getLogger().setLevel(logging.CRITICAL)

docs_found_scopus, docs_notfound_scopus = litstudy.refine_scopus(docs_bib_4, search_title=True)

print(len(docs_found_scopus), 'papers found on Scopus')
print(len(docs_notfound_scopus), 'papers were not found')
docs_bib_scopus_4 = docs_found_scopus | docs_notfound_scopus
len(docs_bib_scopus_4)

53 papers loaded from selected papers list


100%|██████████| 53/53 [00:17<00:00,  2.98it/s]

53 papers found on Scopus
0 papers were not found





53

In [149]:
# Load csv file
docs_bib_6 = litstudy.load_scopus_csv('P6_SOTA_SECO_CPS_2024-11-28_17-38-53_snowballed.csv')
print(len(docs_bib_6), 'papers loaded from selected papers list')

import logging
logging.getLogger().setLevel(logging.CRITICAL)

docs_found_scopus, docs_notfound_scopus = litstudy.refine_scopus(docs_bib_6, search_title=True)

print(len(docs_found_scopus), 'papers found on Scopus')
print(len(docs_notfound_scopus), 'papers were not found')
docs_bib_scopus_6 = docs_found_scopus | docs_notfound_scopus
len(docs_bib_scopus_6)

3 papers loaded from selected papers list


100%|██████████| 3/3 [00:03<00:00,  1.09s/it]

3 papers found on Scopus
0 papers were not found





3

In [150]:
# Load csv file
docs_bib_7 = litstudy.load_scopus_csv('P7_SOTA_SECO_CPS_2024-11-28_17-38-53_snowballed.csv')
print(len(docs_bib_7), 'papers loaded from selected papers list')

import logging
logging.getLogger().setLevel(logging.CRITICAL)

docs_found_scopus, docs_notfound_scopus = litstudy.refine_scopus(docs_bib_7, search_title=True)

print(len(docs_found_scopus), 'papers found on Scopus')
print(len(docs_notfound_scopus), 'papers were not found')
docs_bib_scopus_7 = docs_found_scopus | docs_notfound_scopus
len(docs_bib_scopus_7)

6 papers loaded from selected papers list


100%|██████████| 6/6 [00:02<00:00,  2.62it/s]

6 papers found on Scopus
0 papers were not found





6

In [151]:
# Load csv file
docs_bib_8 = litstudy.load_scopus_csv('P8_SOTA_SECO_CPS_2024-11-28_17-38-53_snowballed.csv')
print(len(docs_bib_8), 'papers loaded from selected papers list')

import logging
logging.getLogger().setLevel(logging.CRITICAL)

docs_found_scopus, docs_notfound_scopus = litstudy.refine_scopus(docs_bib_8, search_title=True)

print(len(docs_found_scopus), 'papers found on Scopus')
print(len(docs_notfound_scopus), 'papers were not found')
docs_bib_scopus_8 = docs_found_scopus | docs_notfound_scopus
len(docs_bib_scopus_8)

18 papers loaded from selected papers list


100%|██████████| 18/18 [00:06<00:00,  2.88it/s]

17 papers found on Scopus
1 papers were not found





18

In [152]:
# Load csv file
docs_bib_9 = litstudy.load_scopus_csv('P9_SOTA_SECO_CPS_2024-11-28_17-38-53_snowballed.csv')
print(len(docs_bib_9), 'papers loaded from selected papers list')

import logging
logging.getLogger().setLevel(logging.CRITICAL)

docs_found_scopus, docs_notfound_scopus = litstudy.refine_scopus(docs_bib_9, search_title=True)

print(len(docs_found_scopus), 'papers found on Scopus')
print(len(docs_notfound_scopus), 'papers were not found')
docs_bib_scopus_9 = docs_found_scopus | docs_notfound_scopus
len(docs_bib_scopus_9)

9 papers loaded from selected papers list


100%|██████████| 9/9 [00:03<00:00,  2.58it/s]

9 papers found on Scopus
0 papers were not found





9

In [153]:
# Load csv file
docs_bib_10 = litstudy.load_scopus_csv('P10_SOTA_SECO_CPS_2024-11-28_17-38-53_snowballed.csv')
print(len(docs_bib_10), 'papers loaded from selected papers list')

import logging
logging.getLogger().setLevel(logging.CRITICAL)

docs_found_scopus, docs_notfound_scopus = litstudy.refine_scopus(docs_bib_10, search_title=True)

print(len(docs_found_scopus), 'papers found on Scopus')
print(len(docs_notfound_scopus), 'papers were not found')
docs_bib_scopus_10 = docs_found_scopus | docs_notfound_scopus
len(docs_bib_scopus_10)

15 papers loaded from selected papers list


100%|██████████| 15/15 [00:05<00:00,  2.92it/s]

15 papers found on Scopus
0 papers were not found





15

In [154]:
# Load csv file
docs_bib_11 = litstudy.load_scopus_csv('P11_SOTA_SECO_CPS_2024-11-28_17-38-53_snowballed.csv')
print(len(docs_bib_11), 'papers loaded from selected papers list')

import logging
logging.getLogger().setLevel(logging.CRITICAL)

docs_found_scopus, docs_notfound_scopus = litstudy.refine_scopus(docs_bib_11, search_title=True)

print(len(docs_found_scopus), 'papers found on Scopus')
print(len(docs_notfound_scopus), 'papers were not found')
docs_bib_scopus_11 = docs_found_scopus | docs_notfound_scopus
len(docs_bib_scopus_11)

8 papers loaded from selected papers list


100%|██████████| 8/8 [00:02<00:00,  2.87it/s]

8 papers found on Scopus
0 papers were not found





8

In [155]:
# Load csv file
docs_bib_12 = litstudy.load_scopus_csv('P12_SOTA_SECO_CPS_2024-11-28_17-38-53_snowballed.csv')
print(len(docs_bib_12), 'papers loaded from selected papers list')

import logging
logging.getLogger().setLevel(logging.CRITICAL)

docs_found_scopus, docs_notfound_scopus = litstudy.refine_scopus(docs_bib_12, search_title=True)

print(len(docs_found_scopus), 'papers found on Scopus')
print(len(docs_notfound_scopus), 'papers were not found')
docs_bib_scopus_12 = docs_found_scopus | docs_notfound_scopus
len(docs_bib_scopus_12)

7 papers loaded from selected papers list


100%|██████████| 7/7 [00:03<00:00,  2.32it/s]

6 papers found on Scopus
1 papers were not found





7

In [156]:
docs_bib = docs_bib_scopus_1 | docs_bib_scopus_2 | docs_bib_scopus_3 | docs_bib_scopus_4 | docs_bib_scopus_6 | docs_bib_scopus_7 | docs_bib_scopus_8 | docs_bib_scopus_9 | docs_bib_scopus_10 | docs_bib_scopus_11 | docs_bib_scopus_12
docs_filtered = docs_bib # .filter_docs(lambda d: d.publication_year >= 2019)

In [157]:
index = 0
data = []
while index < len(docs_filtered):
    # re.sub('[<\[\]>]', '', str(docs_filtered[index].authors))

    authorList = []
    for author in docs_filtered[index].authors or []:
        authorList.append(author.name)

    if type(docs_filtered[index].id.doi) == type(None):
        print(docs_filtered[index].title)
        doi_paper = ''
        doi_paper_custom = ''
        # print(doi_paper)
    else:
        doi_paper = str(docs_filtered[index].id.doi) # 'https://www.doi.org/' + 
        doi_paper_custom = 'https://www.doi.org/' + str(docs_filtered[index].id.doi)
        print(doi_paper)

    data.append({'Authors': '', 'Author full names': re.sub(r'[\[\'\]]', '', str(authorList)), 'Author(s) ID': '', 
    'Title': docs_filtered[index].title, 'Year': docs_filtered[index].publication_year, 'Source title': docs_filtered[index].publication_source, 
    'Volume': '', 'Issue': '', 'Art. No.': '', 'Page start': '', 'Page end': '', 'Page count': '', 'Cited by': docs_filtered[index].citation_count, 
    'DOI': doi_paper, 'Link': doi_paper_custom,  'Affiliations': re.sub(r'[\[\'\]]', '', str(docs_filtered[index].affiliations)), 
    'Authors with affiliations': '', 'Abstract': docs_filtered[index].abstract, 'Author Keywords': re.sub(r'[\[\'\]]', '', str(docs_filtered[index].keywords)), 
    'Index Keywords': '', 'Molecular Sequence Numbers': '',  'Chemicals/CAS': '', 'Tradenames': '', 'Manufacturers': '', 
    'Funding Details': '', 'Funding Texts': '', 'References': re.sub(r'[\[\'\]]', '', str(docs_filtered[index].references)), 'Correspondence Address': '', 'Editors': '', 'Publisher': docs_filtered[index].publisher, 
    'Sponsors': '', 'Conference name': '', 'Conference date': str(docs_filtered[index].publication_date), 'Conference location': '', 'Conference code': '', 'ISSN': '', 
    'ISBN': '', 'CODEN': '', 'PubMed ID': docs_filtered[index].id.pubmed, 'Language of Original Document': docs_filtered[index].language, 'Abbreviated Source Title': '', 'Document Type': docs_filtered[index].source_type, 
    'Publication Stage': '', 'Open Access': '', 'Source': '', 'EID': docs_filtered[index].id.scopusid})
    
    index += 1

# Saving first group of data to a single excel file
df = pd.DataFrame(data, columns=['Authors', 'Author full names', 'Author(s) ID', 
    'Title', 'Year', 'Source title', 
    'Volume', 'Issue', 'Art. No.', 'Page start', 'Page end', 'Page count', 'Cited by', 
    'DOI', 'Link',  'Affiliations', 'Authors with affiliations', 'Abstract', 'Author Keywords', 
    'Index Keywords', 'Molecular Sequence Numbers',  'Chemicals/CAS', 'Tradenames', 'Manufacturers', 
    'Funding Details', 'Funding Texts', 'References', 'Correspondence Address', 'Editors', 'Publisher', 
    'Sponsors', 'Conference name', 'Conference date', 'Conference location', 'Conference code', 'ISSN', 
    'ISBN', 'CODEN', 'PubMed ID', 'Language of Original Document', 'Abbreviated Source Title', 'Document Type', 
    'Publication Stage', 'Open Access', 'Source', 'EID'])

# Saving first group of data to a single csv file
df.to_csv('snoballed_Final' + filename + '.csv')

# Saving first group of data to a single excel file
# df.to_excel('Snow/P2_' + filename_xlsx, index=False)

10.1109/TII.2018.2873186
10.1002/sys.21566
10.1109/ACCESS.2019.2953499
10.1109/JPROC.2020.2998530
10.1007/978-3-030-49435-3_6
10.1109/ISSE46696.2019.8984568
10.1109/SYSCON.2019.8836869
10.1177/0037549719829828
10.1080/0951192X.2019.1599433
10.1109/ICAASE51408.2020.9380125
10.3390/app11073014
10.1002/jmri.27448
10.4271/12-04-01-0003
A case for integrated data processing in large-scale cyber-physical systems
10.1016/j.cie.2019.106004
10.1007/s12525-019-00362-x
10.14488/BJOPM.2019.v16.n2.a8
Edge cloud as an enabler for distributed AI in industrial IoT applications: The experience of the iotwins project
10.1080/0144929X.2019.1581258
10.1145/3365438.3410934
10.1007/s10845-019-01512-w
10.1016/j.cirpj.2020.02.002
10.1145/3419394.3423666
10.1145/3365438.3410941
10.1007/s10270-019-00757-6
10.1109/ICCWorkshops49005.2020.9145100
10.3233/ATDE200188
10.1109/CNS48642.2020.9162337
10.1016/j.ipm.2021.102529
10.1007/s13347-021-00450-x
10.1515/pjbr-2021-0008
10.1109/JSYST.2020.2993323
10.1080/00207543.2