# Import WOS & Scopus to Infoscience

In [1]:
import sys
import os

from datetime import datetime
sys.path.append(os.path.abspath(".."))
from data_pipeline.main import main
from data_pipeline.reporting import GenerateReports

In [2]:
# Création du dossier avec la date actuelle
current_datetime = datetime.now().strftime("%Y_%m_%d-%H_%M_%S")
folder_path = "harvested-data"
os.makedirs(folder_path, exist_ok=True)
path = os.path.join(folder_path, current_datetime)

if not os.path.exists(path):
    os.mkdir(path)

In [None]:
# Exécution du workflow
start = "2025-02-02"
end = "2025-02-04"
df_metadata, df_authors, df_epfl_authors, df_unloaded, df_loaded = main(
    start_date=start, end_date=end
)

In [4]:
df_excluded = df_metadata[~df_metadata["row_id"].isin(df_loaded["row_id"])]


dataframes = {
    "ResearchOutput.csv": df_metadata,
    "AddressesAndNames.csv": df_authors,
    "EpflAuthors.csv": df_epfl_authors,
    "UnloadedDuplicatedPublications.csv": df_unloaded,
    "ImportedPublications.csv": df_loaded,
    "ExcludedPublications.csv": df_excluded,
}

for filename, df in dataframes.items():
    full_path = os.path.join(path, filename)
    df.to_csv(full_path, index=False, encoding="utf-8")

In [None]:
# Initialize the report generator with required DataFrames
report_generator = GenerateReports(df_metadata, df_unloaded, df_epfl_authors, df_loaded)

# Generate the Excel report
report_path = report_generator.generate_excel_report(output_dir=path)

print(f"Excel report generated successfully: {report_path}")