# Import works from a specific researcher

In [1]:
import sys
import os

sys.path.append(os.path.abspath(".."))

In [None]:
from data_pipeline.main import main
from datetime import datetime
import shutil
from data_pipeline.loader import Loader
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
from email import encoders
from email.mime.text import MIMEText

In [3]:
researcherid = "FYN-6937-2022"
scopusauthid = "35190508300"
start_year = "2016"

custom_queries = {
    "wos": f"OG=(Ecole Polytechnique Federale de Lausanne) AND AI={researcherid} AND DOP=({start_year}-01-01/2025-01-01)",
    "scopus": f"(AF-ID(60028186) OR AF-ID(60210159) OR AF-ID(60070536) OR AF-ID(60204330) OR AF-ID(60070531) OR AF-ID(60070534) OR AF-ID(60070538) OR AF-ID(60014951) OR AF-ID(60070529) OR AF-ID(60070532) OR AF-ID(60070535) OR AF-ID(60122563) OR AF-ID(60210160) OR AF-ID(60204331)) AND AU-ID('{scopusauthid}') AND PUBYEAR > {start_year}",
}

In [4]:
# Création du dossier avec la date actuelle
current_datetime = datetime.now().strftime("%Y_%m_%d-%H_%M_%S")
folder_path = "harvested-data"
os.makedirs(folder_path, exist_ok=True)
path = os.path.join(folder_path, current_datetime)

if not os.path.exists(path):
    os.mkdir(path)

In [None]:
# Exécution du workflow
start = "2016-01-01"
end = "2025-01-01"
df_metadata, df_authors, df_epfl_authors, df_unloaded = main(
    start_date=start, end_date=end, queries=custom_queries
)

In [6]:
# Sauvegarde des données au format CSV
df_metadata.to_csv(
    os.path.join(path, "ResearchOutput.csv"), index=False, encoding="utf-8"
)
df_authors.to_csv(
    os.path.join(path, "AddressesAndNames.csv"), index=False, encoding="utf-8"
)
df_epfl_authors.to_csv(
    os.path.join(path, "EpflAuthors.csv"), index=False, encoding="utf-8"
)
df_unloaded.to_csv(
    os.path.join(path, "UnloadedDuplicatedPublications.csv"),
    index=False,
    encoding="utf-8",
)

In [None]:
### WIP Add step to create non-exitsing EPFL person's authorities in DSpace #####################

In [None]:
### Upload data in DSpace #####################
# Loader
loader_instance = Loader(df_metadata, df_epfl_authors)
loaded_items = loader_instance.create_complete_publication()

In [6]:
### Generated reports and send them by email #####################
loaded_items.to_csv(
    os.path.join(path, "ImportedItems.csv"), index=False, encoding="utf-8"
)

In [None]:
# Création d'un fichier zip du dossier
zip_filename = f"{path}.zip"
shutil.make_archive(path, "zip", path)

***