## Search author information By Id function

In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import re
import time
import pandas as pd
import json

In [2]:
# Indiquer le chemin vers le ChromeDriver
chrome_driver_path = "chromedriver.exe" 
service = Service(chrome_driver_path)

# Initialiser le navigateur Chrome
driver = webdriver.Chrome(service=service)

In [3]:
def extract_author_metrics(driver, author_id):
    # Accéder à l'URL de l'auteur
    driver.get(f"https://www.scopus.com/authid/detail.uri?authorId={author_id}#tab=metrics")
    
    # Dictionnaire pour stocker les métriques
    author_data = {
        "Nom_Complet": "N/A",
        "Affiliation": "N/A",
        "Citations": 0,
        "Documents": 0,
        "h-index": 0,
        "FWCI": 0.0
    }
    
    wait = WebDriverWait(driver, 30)  # Attendre jusqu'à 30 secondes

    # Attendre que le nom de l'auteur soit visible
    try:
        author_data["Nom_Complet"] = wait.until(EC.visibility_of_element_located((By.CLASS_NAME, "Typography-module__lVnit.Typography-module__oFCaL"))).text
    except Exception:
        print("Nom de l'auteur indisponible")
        
    # Scraper l'affiliation (pays ou institution)
    try:
        elem = wait.until(EC.visibility_of_element_located((By.CLASS_NAME, "AuthorHeader-module__DRxsE")))
        raw_affiliation  = elem.find_elements(By.CLASS_NAME, "Typography-module__lVnit.Typography-module__Nfgvc")[-1].text
        author_data["Affiliation"] = raw_affiliation.lstrip(', ').replace(', ', ' - ')
    except Exception:
        print("Affiliation indisponible")

    # Attendre que la section des métriques soit visible
    try:
        _metrics = wait.until(EC.visibility_of_element_located((By.CLASS_NAME, "MetricSection-module__s8lWB")))
        
        # Extraire les métriques
        citations = re.search(r'(\d{1,3}(?:,\d{3})*)\s*citation', _metrics.text.lower())
        documents = re.search(r'(\d{1,3}(?:,\d{3})*)\s*Document', _metrics.text)
        h_index = re.search(r'(\d{1,3}(?:,\d{3})*)\s*h-index', _metrics.text.lower())

        # Nettoyage des valeurs extraites et conversion en nombres
        if citations:
            author_data["Citations"] = int(citations.group(1).replace(',', ''))
        
        if documents:
            author_data["Documents"] = int(documents.group(1).replace(',', ''))
        
        if h_index:
            author_data["h-index"] = int(h_index.group(1).replace(',', ''))
    
    except Exception:
        print("Erreur lors de l'extraction des métriques")

    # Attendre que le FWCI soit visible
    try:
        _fwci = wait.until(EC.visibility_of_element_located((By.ID, 'metrics-panel')))
        
        # Extraire le FWCI
        fwci = re.search(r'field-weighted citation impact\s+(\d+\.\d+)', _fwci.text.lower(), re.IGNORECASE)
        
        if fwci:
            author_data["FWCI"] = float(fwci.group(1))
    
    except Exception:
        print("FWCI indisponible ou erreur lors de l'extraction")

    return author_data

In [4]:
_id = 34868498800
extract_author_metrics(driver, _id)

{'Nom_Complet': 'Rodrigo, Manuel A.',
 'Affiliation': 'Ciudad Real - Spain',
 'Citations': 30295,
 'Documents': 639,
 'h-index': 83,
 'FWCI': 1.38}

## Search co-author Ids function

In [4]:
# Define the function
def fetch_co_authors(driver, author_id, nbr_co_auth):
    # Access the co-author page for the given author_id
    url = f"https://www.scopus.com/search/submit/coAuthorSearch.uri?authorId={author_id}&origin=AuthorProfile&sot=al&sdt=coaut&zone=coAuthorsTab"
    driver.get(url)
    driver.get(url)
    
    # Wait for the author results to be visible
    author_results = WebDriverWait(driver, 10).until(
        EC.visibility_of_element_located((By.ID, "srchResultsList"))
    )
    
    # Find all rows (tr) in the results table
    rows = author_results.find_elements(By.TAG_NAME, "tr")
    
    # Set to store unique co-author IDs
    author_ids_set = set()
    
    # Loop through each row and extract co-author IDs
    for row in rows:
        try:
            # Extract the href attribute from the anchor tag
            columns = row.find_element(By.TAG_NAME, "a").get_attribute('href')
            
            # Use regex to capture the authorId from the URL
            match = re.search(r'authorId=(\d+)', columns)
            
            if match:
                # Add the extracted authorId to the set
                author_ids_set.add(match.group(1))
        
        except:
            # Ignore rows where no valid link is found
            pass
    
    # Convert the set of author IDs to a list
    author_ids = list(author_ids_set)
    
    # Create the dictionary
    author_dict = {
        "author_id": author_id,
        "co-authors": author_ids[1:nbr_co_auth + 1]
    }
    
    return author_dict


In [6]:
print(json.dumps(fetch_co_authors(driver, author_id=_id, nbr_co_auth=2), indent=4))

{
    "author_id": 34868498800,
    "co-authors": [
        "6508022819",
        "7005314628"
    ]
}


In [5]:
def author_co_auth_co_auth(driver, author_id, nbr_co_authers):
    # Dictionnaire pour stocker les informations finales
    result = {}
    
    # Récupérer les informations de l'auteur initial
    author_info = extract_author_metrics(driver, author_id)
    co_authors_info = fetch_co_authors(driver, author_id, nbr_co_authers)
    
    # Ajouter les informations de l'auteur initial dans le dictionnaire de sortie
    result[author_id] = {
        'author_info': author_info,
        'co_authors': {}
    }
    
    # Parcourir chaque co-auteur de l'auteur initial
    for co_author_id in co_authors_info['co-authors']:
        # Extraire les informations du co-auteur
        co_author_info = extract_author_metrics(driver, co_author_id)
        
        # Récupérer la liste des co-auteurs du co-auteur
        co_authors_of_co_author = fetch_co_authors(driver, co_author_id, nbr_co_authers)
        
        # Ajouter les informations du co-auteur et de ses co-auteurs
        result[author_id]['co_authors'][co_author_id] = {
            'co_author_info': co_author_info,
            'co_authors': {}
        }
        
        # Parcourir les co-auteurs du co-auteur
        for co_auth_of_co_auth_id in co_authors_of_co_author['co-authors']:
            # Extraire les informations des co-auteurs du co-auteur
            co_auth_of_co_auth_info = extract_author_metrics(driver, co_auth_of_co_auth_id)
            
            # Ajouter les informations des co-auteurs du co-auteur
            result[author_id]['co_authors'][co_author_id]['co_authors'][co_auth_of_co_auth_id] = co_auth_of_co_auth_info
    
    return result


In [8]:
doc = author_co_auth_co_auth(driver, author_id= _id, nbr_co_authers=2)
pretty_data = json.dumps(doc, indent=4)
print(pretty_data)

{
    "34868498800": {
        "author_info": {
            "Nom_Complet": "Rodrigo, Manuel A.",
            "Affiliation": "Ciudad Real - Spain",
            "Citations": 30295,
            "Documents": 639,
            "h-index": 83,
            "FWCI": 1.38
        },
        "co_authors": {
            "6508022819": {
                "co_author_info": {
                    "Nom_Complet": "Sir\u00e9s, Ignasi",
                    "Affiliation": "Barcelona - Spain",
                    "Citations": 19076,
                    "Documents": 211,
                    "h-index": 66,
                    "FWCI": 2.48
                },
                "co_authors": {
                    "7402580405": {
                        "Nom_Complet": "Roberts, Edward P.l.",
                        "Affiliation": "Calgary - Canada",
                        "Citations": 4760,
                        "Documents": 161,
                        "h-index": 40,
                        "FWCI": 1.27
           

In [9]:
# Conversion de JSON en pandas DataFrame
def extract_author_data(author_id, author_data, co_author_ids=None, co_co_author_ids=None):
    # Si l'auteur n'a pas de clé "author_info", on suppose que les données sont directement dans "co_author_info"
    author_info = author_data.get("author_info", author_data.get("co_author_info"))
    return {
        "ID_Auteur": author_id,
        "Nom_Complet": author_info["Nom_Complet"],
        "Affiliation": author_info["Affiliation"],
        "Citations": author_info["Citations"],
        "Documents": author_info["Documents"],
        "h-index": author_info["h-index"],
        "FWCI": author_info["FWCI"],
        "Co-auteur-liste": co_author_ids if co_author_ids is not None else [],
        "Co-Co-auteur-liste": co_co_author_ids if co_co_author_ids is not None else []
    }

# Liste pour stocker les lignes de la DataFrame
rows = []

# Étape 1 : Extraire les données pour l'auteur principal et ses co-auteurs
for author_id, author_data in doc.items():
    co_authors = author_data["co_authors"]

    # Récupérer les ID des co-auteurs
    co_author_ids = list(co_authors.keys())
    
    # Récupérer les ID des co-co-auteurs (tous les co-co-auteurs de tous les co-auteurs)
    co_co_author_ids = []
    for co_author_id, co_author_data in co_authors.items():
        co_co_author_ids.extend(co_author_data["co_authors"].keys())

    # Ajouter la ligne pour l'auteur principal
    rows.append(extract_author_data(author_id, author_data, co_author_ids, co_co_author_ids))

    # Étape 2 : Extraire les données pour chaque co-auteur avec seulement une partie de leurs co-co-auteurs
    co_co_auth_split_1 = co_co_author_ids[:2]  # Pour le premier co-auteur
    co_co_auth_split_2 = co_co_author_ids[2:]  # Pour le deuxième co-auteur
    
    for idx, (co_author_id, co_author_data) in enumerate(co_authors.items()):
        if idx == 0:
            # Premier co-auteur (avec les deux premiers co-co-auteurs)
            rows.append(extract_author_data(co_author_id, co_author_data, co_co_auth_split_1, []))
        elif idx == 1:
            # Deuxième co-auteur (avec les deux derniers co-co-auteurs)
            rows.append(extract_author_data(co_author_id, co_author_data, co_co_auth_split_2, []))

        # Étape 3 : Ajouter une ligne pour chaque co-co-auteur (sans co-auteurs et co-co-auteurs)
        for co_co_author_id, co_co_author_data in co_author_data["co_authors"].items():
            rows.append(extract_author_data(co_co_author_id, {"author_info": co_co_author_data}))

# Création de la DataFrame
df = pd.DataFrame(rows)

# Afficher la DataFrame
df.head(6)


Unnamed: 0,ID_Auteur,Nom_Complet,Affiliation,Citations,Documents,h-index,FWCI,Co-auteur-liste,Co-Co-auteur-liste
0,34868498800,"Rodrigo, Manuel A.",Ciudad Real - Spain,30295,639,83,1.38,"[6508022819, 7005314628]","[7402580405, 6508022819, 6508022819, 7005986098]"
1,6508022819,"Sirés, Ignasi",Barcelona - Spain,19076,211,66,2.48,"[7402580405, 6508022819]",[]
2,7402580405,"Roberts, Edward P.l.",Calgary - Canada,4760,161,40,1.27,[],[]
3,6508022819,"Sirés, Ignasi",Barcelona - Spain,19076,211,66,0.0,[],[]
4,7005314628,"Brillas, Enric",Barcelona - Spain,40834,458,104,2.87,"[6508022819, 7005986098]",[]
5,6508022819,"Sirés, Ignasi",Barcelona - Spain,19076,211,66,2.48,[],[]


## Documents information

In [6]:
# Fonction pour faire défiler progressivement

def scroll_slowly(driver, scroll_pause_time, step_size):
    """Fait défiler la page progressivement."""
    last_height = driver.execute_script("return document.body.scrollHeight")
    
    while True:
        # Défile petit à petit (par step_size pixels)
        driver.execute_script(f"window.scrollBy(0, {step_size});")
        
        # Attendre un peu pour laisser la page se charger
        time.sleep(scroll_pause_time)
        
        # Calculer la nouvelle hauteur après défilement
        new_height = driver.execute_script("return document.body.scrollHeight")
        
        # Vérifier si la hauteur de la page n'a plus changé (fin du défilement)
        if new_height == last_height:
            break
        
        last_height = new_height

In [7]:
# Fonction pour récupérer les documents d'un auteur

def fetch_author_docs_links(driver, author_id):
    """Récupère les liens des documents d'un auteur sur Scopus."""
    # Charger l'URL de l'auteur
    url = f"https://www.scopus.com/authid/detail.uri?authorId={author_id}"
    driver.get(url)

    # Attendre jusqu'à ce que la page se charge
    wait = WebDriverWait(driver, 30)
    
    try:
        # Défilement léger vers le bas de la page
        scroll_slowly(driver, scroll_pause_time=0.2, step_size=1000)

        # Liste pour stocker les eids
        docs_links = []

        # Attendre que les éléments avec la classe spécifiée soient visibles
        res = wait.until(EC.visibility_of_all_elements_located((By.CLASS_NAME, "Typography-module__lVnit.Typography-module__Cv8mo.Typography-module__mZVLC.Typography-module__ETlt8")))

        # Parcourir chaque élément trouvé
        for element in res:
            try:
                # Trouver le lien <a> à l'intérieur de chaque élément
                target_link = element.find_element(By.TAG_NAME, "a")

                # Extraire la valeur de l'attribut 'href'
                href_value = target_link.get_attribute("href")

                # Ajouter le lien à la liste
                docs_links.append(href_value)
            except Exception as e:
                print(f"Erreur lors de l'extraction du lien : {e}")

#         # Afficher les liens trouvés
#         if docs_links:
#             print("Premier lien trouvé :", docs_links[0])

    except Exception as e:
        print(f"Erreur : {e}")

    # Retourner la liste des liens
    return docs_links


In [12]:
docs_links = fetch_author_docs_links(driver, author_id=34868498800)
docs_links

['https://www.scopus.com/record/display.uri?eid=2-s2.0-85198965233&origin=resultslist',
 'https://www.scopus.com/record/display.uri?eid=2-s2.0-85204952835&origin=resultslist',
 'https://www.scopus.com/record/display.uri?eid=2-s2.0-85205557922&origin=resultslist',
 'https://www.scopus.com/record/display.uri?eid=2-s2.0-85203411303&origin=resultslist',
 'https://www.scopus.com/record/display.uri?eid=2-s2.0-85201074819&origin=resultslist',
 'https://www.scopus.com/record/display.uri?eid=2-s2.0-85198962570&origin=resultslist',
 'https://www.scopus.com/record/display.uri?eid=2-s2.0-85198976141&origin=resultslist',
 'https://www.scopus.com/record/display.uri?eid=2-s2.0-85199688748&origin=resultslist',
 'https://www.scopus.com/record/display.uri?eid=2-s2.0-85198353041&origin=resultslist',
 'https://www.scopus.com/record/display.uri?eid=2-s2.0-85196419306&origin=resultslist']

In [8]:
# Fonction pour extraire les éléments avec gestion d'exception
def extract_element(driver, by, locator, timeout=10, multiple=False):
    try:
        if multiple:
            elements = WebDriverWait(driver, timeout).until(EC.presence_of_all_elements_located((by, locator)))
            return [element.text.strip() for element in elements]
        else:
            element = WebDriverWait(driver, timeout).until(EC.presence_of_element_located((by, locator)))
            return element.text.strip()
    except:
        return None if not multiple else []


In [9]:
# Fonction pour récupérer les données des documents

def fetch_documents_data(driver, docs_links):
    documents_data = []

    for link in docs_links:
        # Accéder à l'URL du document
        driver.get(link)

        # Attendre que la page charge complètement
        wait = WebDriverWait(driver, 30)

        # Scraper les données du document
        data = {
            "authors": extract_element(driver, By.CLASS_NAME, 'Button-module__nc6_8.Button-module__rphhF.Button-module__VBKvn.Button-module__MlsfC.Button-module__Y0far.Button-module__hK_LA.Button-module__qDdAl', multiple=True),
            "issn": extract_element(driver, By.XPATH, "//dl[@data-testid='source-info-entry-issn']/dd") or '',
            "doi": extract_element(driver, By.XPATH, "//dl[@data-testid='source-info-entry-doi']/dd") or '',
            "title": extract_element(driver, By.CSS_SELECTOR, 'h2.Typography-module__lVnit') or '',
            "type_doc": extract_element(driver, By.XPATH, "//dl[@data-testid='source-info-entry-document-type']/dd") or '',
            "source_type": extract_element(driver, By.XPATH, "//dl[@data-testid='source-info-entry-source-type']/dd") or '',
            "citation": extract_element(driver, By.CLASS_NAME, "panel-title").split()[-2] if extract_element(driver, By.CLASS_NAME, "panel-title") else '',
            "date_pub": extract_element(driver, By.XPATH, "(//div[contains(@class, 'PublicationInformationBar-module')]//span)[4]") or '',
            "abstract": extract_element(driver, By.CLASS_NAME, "Typography-module__lVnit.Typography-module__ETlt8.Typography-module__GK8Sg") or '',
            "key_words": extract_element(driver, By.CLASS_NAME, "Typography-module__lVnit.Typography-module__ETlt8.AuthorKeywords-module__tuDgJ", multiple=True) or ''
        }

        # Ajouter les données du document à la liste
        documents_data.append(data)

    return documents_data


In [16]:
documents_data = fetch_documents_data(driver, docs_links)
print(json.dumps(documents_data, indent=3))

[
   {
      "authors": [
         "S. S. Castro, Ra\u00edra",
         "O. S. Santos, G\u00e9ssica",
         "V. Lanza, Marcos Roberto",
         "R. Salazar-Banda, Giancarlo",
         "I. B. Eguiluz, Katlin",
         "S\u00e1ez, Cristina",
         "Rodrigo, Manuel Andr\u00e9s"
      ],
      "issn": "13835866",
      "doi": "10.1016/j.seppur.2024.128828",
      "title": "Towards an electrochemically-based circular economy: Electro-refinery for valorizing phenolic wastewater",
      "type_doc": "Article",
      "source_type": "Journal",
      "citation": "0",
      "date_pub": "19 February 2025",
      "abstract": "This study introduces an innovative approach to wastewater treatment that combines two electrochemical technologies: electrolysis and electrochemical separation facilitated by anionic exchange membranes. This integrated technology converts pollutants into carboxylates, which are valuable intermediates for further electrosynthesis and fuel production. The efficacy of thi

# Get Journal Info

In [35]:
# Extraire seulement les valeurs "issn" de chaque document
issn_list = [doc.get("issn", "ISSN not found") for doc in documents_data]
print(issn_list)

['13835866', '03043894', '03603199', '03043894', '09601481', '00134686', '13858947', '00456535', '13858947', '00456535']


In [10]:
def Search_journal_info(driver, issn):
    
    # Accéder à la page avec le formulaire de recherche
    driver.get("https://www.scimagojr.com/")

    # Attendre que l'input de recherche soit présent
    try:               
        search_input = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "searchinput"))
        )
        # Entrer l'ISSN et simuler un appui sur la touche "Entrée"
        search_input.clear()
        search_input.send_keys(issn, Keys.RETURN)

        # Attendre que les résultats s'affichent et cliquer
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "search_results"))
        ).click()

    except Exception as e:
        print(f"An error occurred: {e}")


In [21]:
len(issn_list)

10

- Fonctions auxiliaires pour extraire le quartile, SJR et Facteur d'Impact:

In [11]:
def extract_quartile(soup):
    # Extraction de la métrique Quartile
    quartile_data = {}
    dash = soup.findAll('div', class_='dashboard')
    if dash:
        quart_dash = dash[0].findAll('div', class_="cellslide")
        if quart_dash:
            last_quartile_row = quart_dash[1].find('tbody').find_all('tr')[-1].find_all('td')
            if len(last_quartile_row) == 3:
                quartile_data = {
#                     "category": last_quartile_row[0].text,
                    "year": last_quartile_row[1].text,
                    "quartile_value": last_quartile_row[2].text
                }
    return quartile_data

In [12]:
def extract_sjr(soup):
    # Extraction de la métrique SJR
    sjr_data = {}
    dash = soup.findAll('div', class_='dashboard')
    if len(dash) > 1:
        sjr_dash = dash[1].findAll('div', class_="cellslide")[1]
        sjr_row = sjr_dash.find('tbody').find_all('tr')[-1].find_all('td')
        if len(sjr_row) == 2:
            sjr_data = {
                "year": sjr_row[0].text,
                "sjr_value": sjr_row[1].text
            }
    return sjr_data

In [13]:
def extract_impact_factor(soup):
    # Extraction de la métrique Facteur d'Impact
    impact_data = {}
    dash = soup.findAll('div', class_='dashboard')
    if len(dash) > 1:
        impact_dash = dash[1].findAll('div', class_="cellslide")[5]
        impact_row = impact_dash.find('tbody').find_all('tr')[-1].find_all('td')
        if len(impact_row) == 3:
            impact_data = {
                "year": impact_row[1].text,
                "impact_factor_value": impact_row[2].text
            }
    return impact_data

-  Fonction principale pour extraire les informations d'une revue en utilisant l'ISSN:

In [14]:
from bs4 import BeautifulSoup

def extract_journal_metrics(driver, issn_list):
    journals_info = []    
    
    for issn in issn_list:
        # Utiliser Search_journal_info pour obtenir les metriques
        Search_journal_info(driver, issn)
        
        # Scraper le h-index
        hindexnumber = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "hindexnumber"))
        ).text

        # Scraper le scope en retirant la dernière ligne non souhaitée
        scope_pt = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "fullwidth"))
        ).text.split("\n")[1]

        # Obtenir le code HTML de la page et l'analyser avec BeautifulSoup
        page_source = driver.page_source
        soup = BeautifulSoup(page_source, 'html.parser')

        # Extraire les autres informations de la revue
        journal_data = {
            "issn": issn,
            "scope": scope_pt or "N/A",
            "index": "Scopus",
            "h_index": hindexnumber or "N/A",
            "quartile": extract_quartile(soup) or "N/A",
            "sjr": extract_sjr(soup) or {"sjr_value": "N/A", "year": "N/A"},
            "impact_factor": extract_impact_factor(soup) or {"impact_factor_value": "N/A", "year": "N/A"}
        }

        # Ajouter les données de la revue à la liste des informations de revues
        journals_info.append(journal_data)
        
        print("Get Journal info for issn ====> ", issn)
#         print(journal_data)

    return journals_info

In [58]:
# Utilisation de la fonction pour obtenir les informations des revues
journals_data = extract_journal_metrics(driver, issn_list)

Get Journal info for issn ====>  13835866
{'issn': '13835866', 'scope': 'Separation and Purification Technology is a publication dedicated to the dissemination of separation research results and findings. Its coverage extends to all aspects of separation and purification of homogeneous solutions and heterogeneous mixtures. Separation and Purification Technology welcomes, from investigators worldwide, contributions focused on the experimental studies and theoretical analyses of phenomena associated with and arising from separation and purification as well as process development and simulation, equipment design and fabrication and materials preparation and modification used in separation operations.', 'index': 'Scopus', 'h_index': '202', 'quartile': {'year': '2023', 'quartile_value': 'Q1'}, 'sjr': {'year': '2023', 'sjr_value': '1.533'}, 'impact_factor': {'year': '2023', 'impact_factor_value': '8.857'}}
Get Journal info for issn ====>  03043894
{'issn': '03043894', 'scope': 'The Journal o

In [33]:
# # Affichage des informations de chaque journal
# for journal in documents_data:
#     print(journal)

# Main Function

In [15]:
import json
from selenium import webdriver

def generate_author_json(driver, author_id, nbr_co_auth):
    # Initialiser le dictionnaire principal pour stocker toutes les données de l'auteur
    result = {
        "author_id": author_id,
        "author_info": {},
        "co_authors": [],
        "publications": []
    }
    
    # Extraire les informations de l'auteur principal
    result["author_info"] = extract_author_metrics(driver, author_id)
    
    # Extraire les informations des co-auteurs
    co_authors_data = fetch_co_authors(driver, author_id, nbr_co_auth)
    for co_author_id in co_authors_data["co-authors"]:
        # Extraire les informations de chaque co-auteur
        co_author_info = extract_author_metrics(driver, co_author_id)
        result["co_authors"].append({
            "co_author_id": co_author_id,
            "co_author_info": co_author_info
        })
    
    # Extraire les documents publiés par l'auteur principal
    docs_links = fetch_author_docs_links(driver, author_id)
    publications_data = fetch_documents_data(driver, docs_links)
    
    for pub_data in publications_data:
        # Extraire les informations de la revue en fonction de l'ISSN
        journal_info = {}
        if pub_data.get("issn"):
            journal_info = extract_journal_metrics(driver, [pub_data["issn"]])
        
        # Ajouter les informations du document au dictionnaire
        result["publications"].append({
            "authors": pub_data["authors"],
            "title": pub_data["title"],
            "publication_year": pub_data["date_pub"],
            "source_title": pub_data["source_type"],
            "citation_count": pub_data["citation"],
            "doi": pub_data["doi"],
            "abstract": pub_data["abstract"],
            "keywords": pub_data["key_words"],
            "document_type": pub_data["type_doc"],
            "journal_info": journal_info
        })

    return result


In [16]:

# # Créer le driver Selenium
# driver = webdriver.Chrome()

# Exemple d'utilisation
author_id = "57203014555"  
nbr_co_auth = 1 # Nombre de co-auteurs à récupérer

# Générer le dictionnaire des données d'auteur
author_data = generate_author_json(driver, author_id, nbr_co_auth)




Get Journal info for issn ====>  23129794
Get Journal info for issn ====>  23530626
Get Journal info for issn ====>  15313492
Get Journal info for issn ====>  02181274
Get Journal info for issn ====>  02181274


In [17]:
author_data

{'author_id': '57203014555',
 'author_info': {'Nom_Complet': 'Aghriche, Ahmed',
  'Affiliation': 'Kenitra - Morocco',
  'Citations': 48,
  'Documents': 5,
  'h-index': 3,
  'FWCI': 0.79},
 'co_authors': [{'co_author_id': '6507649903',
   'co_author_info': {'Nom_Complet': 'Tridane, Abdessamad',
    'Affiliation': 'Al Ain - United Arab Emirates',
    'Citations': 819,
    'Documents': 73,
    'h-index': 14,
    'FWCI': 0.89}}],
 'publications': [{'authors': ['Najm F.',
    'Yafia R.',
    'Aziz Alaoui M.A.',
    'Aghriche A.',
    'Moussaoui A.'],
   'title': 'A survey on constructing Lyapunov functions for reaction-diffusion systems with delay and their application in biology',
   'publication_year': '2023',
   'source_title': 'Journal',
   'citation_count': '0',
   'doi': '10.23939/mmc2023.03.965',
   'abstract': 'Motivated by some biological and ecological problems given by reaction-diffusion systems with delays and boundary conditions of Neumann type and knowing their associated Lyap

In [18]:
import json 
print(json.dumps(author_data, indent=4))

{
    "author_id": "57203014555",
    "author_info": {
        "Nom_Complet": "Aghriche, Ahmed",
        "Affiliation": "Kenitra - Morocco",
        "Citations": 48,
        "Documents": 5,
        "h-index": 3,
        "FWCI": 0.79
    },
    "co_authors": [
        {
            "co_author_id": "6507649903",
            "co_author_info": {
                "Nom_Complet": "Tridane, Abdessamad",
                "Affiliation": "Al Ain - United Arab Emirates",
                "Citations": 819,
                "Documents": 73,
                "h-index": 14,
                "FWCI": 0.89
            }
        }
    ],
    "publications": [
        {
            "authors": [
                "Najm F.",
                "Yafia R.",
                "Aziz Alaoui M.A.",
                "Aghriche A.",
                "Moussaoui A."
            ],
            "title": "A survey on constructing Lyapunov functions for reaction-diffusion systems with delay and their application in biology",
            

In [None]:
# Sauvegarder le dictionnaire en format JSON
with open("author_data.json", "w", encoding="utf-8") as json_file:
    json.dump(author_data, json_file, ensure_ascii=False, indent=4)

print("Fichier JSON créé avec succès : author_data.json")
# driver.quit()

In [None]:
Authors_list = [57203014555, 6504491745, 57210231748, 55497471000, 57190491921, 56168902600, 46161208600, 15753326700, 57215420513, 6504822757, 57202849162, 57194380165, 46061374500, 57202156076, 57638090100, 57193550788]