# Import WOS & Scopus to Infoscience

In [1]:
import sys
import os

from datetime import datetime
sys.path.append(os.path.abspath(".."))
from data_pipeline.main import main
from data_pipeline.reporting import GenerateReports

## Initialize working directory

In [2]:

current_datetime = datetime.now().strftime("%Y_%m_%d-%H_%M_%S")
folder_path = "harvested-data"
os.makedirs(folder_path, exist_ok=True)
path = os.path.join(folder_path, current_datetime)

if not os.path.exists(path):
    os.mkdir(path)

## Define parameters

In [3]:
start = "2025-02-05"
end = "2025-02-07"

# author_ids = ["DYK-7080-2022", "23008979400"]
author_ids = None

# wos_query = ("AI=(DYK-7080-2022) AND OG=(Ecole Polytechnique Federale de Lausanne) AND PY=2024")
wos_query = None

# scopus_query = "AU-ID ( 23008979400 ) AND AFFIL ( ecole polytechnique federale de lausanne ) AND PUBYEAR > 2023"
scopus_query = None

## Run pipeline

In [None]:
custom_queries = {}

if wos_query:
    custom_queries["wos"] = wos_query
if scopus_query:
    custom_queries["scopus"] = scopus_query

queries = custom_queries if custom_queries else None

df_metadata, df_authors, df_epfl_authors, df_unloaded, df_loaded = main(
    start_date=start, end_date=end, queries=queries, authors_ids=author_ids
)

## Create dataframe

In [5]:
df_rejected = df_metadata[~df_metadata["row_id"].isin(df_loaded["row_id"])]

dataframes = {
    "ResearchOutput.csv": df_metadata,
    "AddressesAndNames.csv": df_authors,
    "EpflAuthors.csv": df_epfl_authors,
    "UnloadedDuplicatedPublications.csv": df_unloaded,
    "ImportedPublications.csv": df_loaded,
    "RejectedPublications.csv": df_rejected,
}

for filename, df in dataframes.items():
    full_path = os.path.join(path, filename)
    df.to_csv(full_path, index=False, encoding="utf-8")

## Generate the Excel report

In [None]:
report_generator = GenerateReports(df_metadata, df_unloaded, df_epfl_authors, df_loaded)
report_path = report_generator.generate_excel_report(output_dir=path)

print(f"Excel report generated successfully: {report_path}")

## Send the Excel report by Email

In [None]:
report_generator.send_report_by_email(
    recipient_email=recipient_email,
    sender_email=sender_email,
    smtp_server=smtp_server,
    import_start_date=start,
    import_end_date=end,
    file_path=report_path,
)