In [10]:
import pandas as pd
import networkx as nx
from helpers import get_refs_and_cites, deduplicate_edges
from tqdm import tqdm
from IPython.display import clear_output
from habanero import Crossref
import pickle

Теперь давайте сделаем граф. На этом этапе я решил не делить данные по годам, поскольку пока мы не изучаем динамику и данных в принципе немного.

Для этого я воспользуюсь библиотекой networkx. 

In [2]:
cr = Crossref()
def recursive_edges(base_doi: str, edges: list, doi: str = "", depth = 3, curr_depth = 3):
    """
    Build the citation graph, starting from doi, with depth.
    Need to deduplicate edges after.
    Hoping that fixing the depth to 5 would yield a single connected component for multiple starting dois
    """
    if curr_depth == 0:
        return
    # time.sleep(0.5)
    d = doi if len(doi) else base_doi
    try:
        refs, cites = get_refs_and_cites(d, cr)
    except Exception as e:
        print(e)
        print(type(e))
    refs_edges = zip([d] * len(refs), refs)
    cites_edges = zip(cites, [d] * len(cites))
    edges.extend(refs_edges)
    edges.extend(cites_edges)
    for ref in refs:
        recursive_edges(base_doi, edges, ref, depth, curr_depth-1)
    if curr_depth < depth:
        for cite in cites:
            if cite != base_doi:
                recursive_edges(base_doi, edges, cite, depth, curr_depth-1)

In [3]:
def build_graph(g: nx.DiGraph, df: pd.DataFrame, depth: int = 5):
    """
    g - base graph.
    df - DataFrame, containing dois of publications
    Algo: With a fixed depth iterate over all publications within a cluster, gathering citations and references.
        From that, get a list of these connections in the form of edges. Then deduplicate and add them to the graph.
    """
    for ind, row in tqdm(df.iterrows()):
        edges = []
        recursive_edges(row["doi"], edges, depth=depth, curr_depth=depth)
        edges = deduplicate_edges(edges)
        g.add_edges_from(edges)
    

In [40]:
pubs = pd.read_csv("../data/clean_pubs.csv")

Перед тем как пытаться собрать данные в одну связную компоненту путем рекурсивного поиска цитирований, я собираюсь кластеризовать наши данные, чтобы немного ускорить процесс поиска

In [41]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer

# Download NLTK resources (if not already downloaded)
nltk.download('punkt_tab')
nltk.download('stopwords')

# Initialize stemmer and stop words
stemmer = PorterStemmer()
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    # Tokenize and remove stop words
    tokens = nltk.word_tokenize(text.lower())
    tokens = [stemmer.stem(word) for word in tokens if word.isalnum() and word not in stop_words]
    return ' '.join(tokens)

# Apply preprocessing to the abstracts
pubs['processed_abstract'] = pubs['abstract'].apply(preprocess_text)


[nltk_data] Downloading package punkt_tab to /home/vunz/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to /home/vunz/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [42]:
# Initialize TF-IDF Vectorizer
vectorizer = TfidfVectorizer()

# Fit and transform the processed abstracts
tfidf_matrix = vectorizer.fit_transform(pubs['processed_abstract'])

In [43]:
from sklearn.cluster import KMeans

# Set the number of clusters (you may want to experiment with this value)
num_clusters = 5
kmeans = KMeans(n_clusters=num_clusters, random_state=42)

# Fit K-Means on the TF-IDF matrix
kmeans.fit(tfidf_matrix)

# Assign cluster labels to each publication in the DataFrame
pubs['cluster'] = kmeans.labels_

In [44]:
pubs.groupby(by="cluster").count()[["doi"]].rename({"doi":"count"},axis=1)

Unnamed: 0_level_0,count
cluster,Unnamed: 1_level_1
0,1548
1,5023
2,14465
3,24885
4,18425


In [45]:
pubs

Unnamed: 0.1,Unnamed: 0,title,doi,year,abstract,keywords,refs,authors,processed_abstract,cluster
0,0,Studies of Zγ production in association with a...,10.1007/JHEP07(2017)107,2017,The production of a Z boson and a photon in as...,"(""'Electroweak interaction'"", "" 'Hadron-Hadron...","('Eboli O.J.P., Gonzalez-Garcia M.C., Lietti S...","(""'Ahmadov F.'"", "" 'Aleksandrov I.N.'"", "" 'Bed...",product z boson photon associ dijet system stu...,1
1,1,Towards the detection of light and heavy relic...,10.1016/j.ppnp.2011.01.050,2011,The standard Big Bang cosmology predicts that ...,"(""'Neutrino capture'"", "" 'Relic neutrinos'"", ""...","('Giunti C., Kim C.W., Fundamentals of Neutrin...","(""'Šimkovic F.'"",)",standard big bang cosmolog predict univers abu...,3
2,3,Measurement of the underlying event in jet eve...,10.1140/epjc/s10052-014-2965-5,2014,Distributions sensitive to the underlying even...,"('',)",('The underlying event in hard interactions at...,"(""'Ahmadov F.'"", "" 'Aleksandrov I.N.'"", "" 'Bed...",distribut sensit underli event qcd jet event m...,2
3,4,Bubble and kink solitons in the φ6-model of no...,10.1016/0375-9601(93)91074-F,1993,We have studied the φ6-model in the parameter ...,"('',)","('Kosevich, Et al., Sov. J. Low Temp. Phys., 2...","(""'Agüero Granados M.A.'"",)",studi paramet domain 1 relev paramet model cas...,4
4,5,Standard complex for quantum lie algebras,10.1134/1.1432906,2001,"For a quantum Lie algebra Γ, let Γ∧ be its ext...","('',)","('Woronowicz S.L., Publ. RIMS Kyoto, 23, (1987...","(""'Burdik C.'"", "" 'Isaev A.P.'"", "" 'Ogievetsky...",quantum lie algebra γ let exterior extens alge...,4
...,...,...,...,...,...,...,...,...,...,...
64341,72011,Molecular markers development for studying of ...,10.1063/5.0063872,2021,['Here we report about molecular genetic marke...,"('',)","('Philippe H., Et al., Phylogenomics Revives T...","(""'Yakhnenko A.'"", "" 'Yushin N.'"", "" 'Zinicovs...",report molecular genet marker develop primer p...,3
64342,72012,Measurement of the anomalous precession freque...,10.1103/PhysRevD.103.072002,2021,['The Muon g-2 Experiment at Fermi National Ac...,"('',)","('Abi B., Letter companion, Measurement of the...","(""'Baranov V.A.'"", "" 'Duginov V.N.'"", "" 'Khomu...",muon experi fermi nation acceler laboratori fn...,2
64343,72013,ATLAS results on quarkonia and heavy flavor pr...,10.1142/S0217751X20440030,2020,['The associated production a vector boson wit...,"(""'associated production'"", "" 'atlas'"", "" 'hea...","('J. Instrum, 3, (2008)', 'J. High Energy Phys...","(""'Lyubushkina T.'"",)",associ product vector boson key observ underst...,2
64344,72014,Analytical study of light bullets stabilizatio...,10.1016/j.chaos.2022.111799,2022,"[""Dynamics of light bullets in the Raman activ...","(""'diffraction'"", "" 'light bullet'"", "" 'lyapun...","('Couairon A., Mysyrowicz A., Femtosecond fila...","(""'Bugay A.N.'"", "" 'Khalyapin V.A.'"")",dynam light bullet raman activ ioniz medium an...,4


Выберем, к примеру, второй кластер. Потом надо будет использовать алгоритм на каждом, но в данный момент для проверки я просто возьму кластер со сравнительно небольшим количеством публикаций. И не слишком маленьким, как в первом кластере

In [46]:
cluster0 = pubs.loc[pubs.cluster == 0]
samp = cluster0.iloc[:450]

In [13]:
graph = nx.DiGraph()

In [None]:
# build_graph(graph, samp, depth=2)

19it [06:13, 16.67s/it]

HTTP error: 400 Client Error: Bad Request for url: https://opencitations.net/index/coci/api/v1/citations/10.1016/0029-554X(67)91363-8


21it [06:24, 10.57s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


58it [12:38, 11.96s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(h

67it [19:48, 10.97s/it] 

In crossref got an exception Redirect response '301 Moved Permanently' for url 'https://api.crossref.org/works/10.1007/s100520050217'
Redirect location: '/works/10.1007/s100529800895'
For more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/301


70it [20:20,  9.87s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


72it [21:07, 14.96s/it]

In crossref got an exception Client error '404 Not Found' for url 'https://api.crossref.org/works/10.5516/j.physletb.2015.09.001'
For more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/404


82it [22:16,  5.36s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


85it [22:45,  7.40s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


92it [23:34,  6.56s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1139/p56-005 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fd2348fe00>, 'Connection to opencitations.net timed out. (connect timeout=10)'))
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1103/RevModPhys.27.339 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fd0d12caa0>, 'Connection to opencitations.net timed out. (connect timeout=10)'))
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1016/0029-5582(60)90038-9 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fe4157b860>, 'Connection to opencitations.net timed out. (connect timeout=10)'))


94it [25:21, 25.84s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1002/ctpp.19810210505 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fddf826a50>, 'Connection to opencitations.net timed out. (connect timeout=10)'))


95it [25:43, 24.65s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(h

98it [29:27, 42.25s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


101it [29:43, 17.25s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


103it [30:03, 13.10s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


105it [32:08, 36.09s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


108it [32:34, 17.13s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


111it [33:56, 19.83s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


112it [34:49, 29.61s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


115it [35:32, 17.05s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1063/1.1716052 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fd550a4950>, 'Connection to opencitations.net timed out. (connect timeout=10)'))
In crossref got an exception [Errno 113] No route to host
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1109/TNS2.1958.4315653 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fd545ec2c0>, 'Connection to opencitations.net timed out. (connect timeout=10)'))


116it [36:14, 24.79s/it]

In crossref got an exception timed out


119it [37:02, 16.63s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


121it [37:38, 16.11s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


122it [38:37, 28.95s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1016/j.nuclphysa.2006.12.055 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fe4155baa0>, 'Connection to opencitations.net timed out. (connect timeout=10)'))
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


126it [42:17, 40.90s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


130it [43:11, 16.89s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1007/BF01459410 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fde38d1ac0>, 'Connection to opencitations.net timed out. (connect timeout=10)'))
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.2307/1968929 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fd54530a70>, 'Connection to 

132it [51:44, 117.01s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


133it [52:15, 91.27s/it] 

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


145it [53:37,  6.18s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


146it [55:03, 30.10s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


147it [55:38, 31.39s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


148it [56:11, 31.84s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1103/PhysRev.157.317 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fd3b9cb0e0>, 'Connection to opencitations.net timed out. (connect timeout=10)'))
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1103/PhysRevB.6.832 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fd3abe3c80>, 'Connection to opencitations.net timed out. (connect timeout=10)'))
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1063/1.1662830 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fd97a3bf80>, 'Connection to opencitations.net timed out. (connect timeout=10)'))
Request failed: HTTPSConnectionPool(host='opencita

149it [57:36, 47.79s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1103/PhysRev.107.1729 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x72fd234ecd10>: Failed to establish a new connection: [Errno 113] No route to host'))
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1103/PhysRev.106.386 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fcb81c7890>, 'Connection to opencitations.net timed out. (connect timeout=10)'))
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1103/PhysRev.109.223 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fcb81c61b0>, 'Connection to opencitations.net timed out. (connect timeout=10)'))
Request failed: HTTPSConnectionPool(host

162it [1:04:19,  8.02s/it] 

In crossref got an exception timed out
In crossref got an exception timed out
HTTP error: 400 Client Error: Bad Request for url: https://opencitations.net/index/coci/api/v1/citations/10.1103/PhysRevB.6.4370


177it [1:07:06,  7.94s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
In crossref got an exception Redirect response '301 Moved Permanently' for url 'https://api.crossref.org/works/10.1002/andp.19730290204'
Redirect location: '/works/10.1002/andp.19734840204'
For more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/301
In crossref got an exception timed out


178it [1:10:27, 65.84s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1002/pssb.2221030259 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x72fddf8204d0>: Failed to establish a new connection: [Errno 113] No route to host'))


179it [1:10:50, 53.05s/it]

In crossref got an exception [Errno -3] Temporary failure in name resolution
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.2753/RSP1061-1967150425 (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x72fd545eeba0>: Failed to resolve 'opencitations.net' ([Errno -3] Temporary failure in name resolution)"))


180it [1:11:30, 49.18s/it]

In crossref got an exception [Errno -3] Temporary failure in name resolution


181it [1:11:44, 38.43s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


183it [1:13:21, 42.26s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


184it [1:14:11, 44.57s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


190it [1:15:50, 18.86s/it]

In crossref got an exception [Errno 113] No route to host


191it [1:16:02, 16.65s/it]

In crossref got an exception _ssl.c:983: The handshake operation timed out


195it [1:16:26,  9.91s/it]

In crossref got an exception _ssl.c:983: The handshake operation timed out
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1103/PhysRevC.93.019802 (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x72fddf844c50>: Failed to resolve 'opencitations.net' ([Errno -3] Temporary failure in name resolution)"))


197it [1:17:30, 18.44s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1002/pssb.2220770108 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fe4157b680>, 'Connection to opencitations.net timed out. (connect timeout=10)'))
Request failed: HTTPSConnectionPool(hos

198it [1:25:00, 148.13s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1007/BF02748866 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fc055947d0>, 'Connection to opencitations.net timed out. (connect timeout=10)'))


199it [1:25:11, 106.89s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


201it [1:26:24, 68.16s/it] 

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


203it [1:27:09, 44.19s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


204it [1:27:29, 36.80s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


209it [1:29:45, 23.74s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


214it [1:32:06, 19.42s/it]

In crossref got an exception Client error '404 Not Found' for url 'https://api.crossref.org/works/10.1045/january2003-kalinichenko'
For more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/404
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


217it [1:32:43, 12.62s/it]

In crossref got an exception Client error '404 Not Found' for url 'https://api.crossref.org/works/10.3204/DESY-PROC-2010-01/lykasov'
For more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/404


218it [1:32:49, 10.58s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


226it [1:33:51,  7.19s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


229it [1:34:31, 10.18s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1103/PhysRevD.18.3890 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fd3ab9ce30>, 'Connection to opencitations.net timed out. (connect timeout=10)'))
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1103/PhysRevD.10.2445 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fc21e39100>, 'Connection to opencitations.net timed out. (connect timeout=10)'))


231it [1:36:03, 24.44s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1016/S0375-9474(00)00125-1 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fe29a2b890>, 'Connection to opencitations.net timed out. (connect timeout=10)'))
In crossref got an exception timed out
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1016/S0375-9474(00)00462-0 (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x72fd56447830>: Failed to resolve 'opencitations.net' ([Errno -3] Temporary failure in name resolution)"))
In crossref got an exception [Errno -3] Temporary failure in name resolution
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1016/S0375-9474(00)00613-8 (Caused by NameResolutionError("<urllib3.connec

234it [2:01:21, 234.37s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


235it [2:02:14, 179.79s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


237it [2:02:26, 90.79s/it] 

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1007/BF02827775 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fcb819a510>, 'Connection to opencitations.net timed out. (connect timeout=10)'))


240it [2:02:46, 34.35s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1016/0969-8043(93)90112-N (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fd23434740>, 'Connection to opencitations.net timed out. (connect timeout=10)'))
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1016/0969-8043(94)90123-6 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fd234354f0>, 'Connection to opencitations.net timed out. (connect timeout=10)'))


241it [2:03:18, 33.60s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1103/RevModPhys.35.335 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fd234377d0>, 'Connection to opencitations.net timed out. (connect timeout=10)'))


243it [2:03:36, 20.41s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1063/1.1150301 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fd234358b0>, 'Connection to opencitations.net timed out. (connect timeout=10)'))
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1063/1.1427032 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fd21f4e660>, 'Connection to opencitations.net timed out. (connect timeout=10)'))


245it [2:04:11, 17.89s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1002/pssb.2221500144 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fd21f4f800>, 'Connection to opencitations.net timed out. (connect timeout=10)'))
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1063/1.435259 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fd21f4e030>, 'Connection to opencitations.net timed out. (connect timeout=10)'))


246it [2:04:43, 22.35s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


250it [2:05:45, 14.18s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
HTTP error: 400 Client Error: Bad Request for url: https://opencitations.net/index/coci/api/v1/citations/10.1103/PhysicsPhysiqueFizika.1.195
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


266it [2:10:06,  6.68s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


267it [2:11:44, 34.04s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


272it [2:12:37, 14.04s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


273it [2:12:54, 15.21s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


286it [2:15:09,  8.64s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


289it [2:15:54, 10.17s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1146/annurev.aa.05.090167.002341 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fd5666c890>, 'Connection to opencitations.net timed out. (connect timeout=10)'))
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1103/PhysRev.78.22 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fde0679070>, 'Connection to opencitations.net timed out. (connect timeout=10)'))
HTTP error: 400 Client Error: Bad Request for url: https://opencitations.net/index/coci/api/v1/citations/10.1103/PhysRev.108.1175
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci

291it [2:18:51, 42.47s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


292it [2:19:55, 48.96s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1016/0010-4655(82)90060-1 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fd3b9b5a00>, 'Connection to opencitations.net timed out. (connect timeout=10)'))


294it [2:25:01, 89.25s/it] 

HTTP error: 400 Client Error: Bad Request for url: https://opencitations.net/index/coci/api/v1/citations/10.1103/PhysRevLett.8.142


296it [2:25:59, 57.97s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


297it [2:26:10, 43.89s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


304it [2:29:01, 30.80s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1002/pssb.2221230251 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fd21f1f4d0>, 'Connection to opencitations.net timed out. (connect timeout=10)'))
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1016/0038-1098(79)91051-2 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fd21f1e960>, 'Connection to opencitations.net timed out. (connect timeout=10)'))


306it [2:29:40, 23.51s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


315it [2:31:15,  7.93s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(h

320it [2:39:35, 51.83s/it] 

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1016/0165-7992(81)90051-8 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fde0730080>, 'Connection to opencitations.net timed out. (connect timeout=10)'))
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1007/BF00352529 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fd545c1880>, 'Connection to opencitations.net timed out. (connect timeout=10)'))
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1016/0027-5107(69)90072-4 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fd545c1430>, 'Connection to opencitations.net timed out. (connect timeout=10)'))
Request failed: HTTPSConnectionPool(ho

327it [2:41:35, 12.97s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1007/3-540-10290-6 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fd24481130>, 'Connection to opencitations.net timed out. (connect timeout=10)'))


330it [2:41:54,  7.66s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1103/PhysRevLett.29.500 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fd3bca8260>, 'Connection to opencitations.net timed out. (connect timeout=10)'))
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1016/0370-2693(87)90555-7 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fd3bcaabd0>, 'Connection to opencitations.net timed out. (connect timeout=10)'))


333it [2:42:34,  9.40s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1002/pssb.2221490145 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fd54399850>, 'Connection to opencitations.net timed out. (connect timeout=10)'))


334it [2:43:00, 14.40s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1143/JPSJ.51.213 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fd54398e60>, 'Connection to opencitations.net timed out. (connect timeout=10)'))


343it [2:44:30, 10.58s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


344it [2:44:42, 10.77s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


345it [2:44:53, 11.00s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1088/0954-3899/30/8/008 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fe12f59d90>, 'Connection to opencitations.net timed out. (connect timeout=10)'))
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


346it [2:47:56, 62.62s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


349it [2:49:59, 47.32s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(h

350it [2:54:58, 122.71s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


351it [2:55:14, 90.78s/it] 

In crossref got an exception The read operation timed out


352it [2:55:24, 66.68s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1016/0370-2693(82)90967-4 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fe29f3c410>, 'Connection to opencitations.net timed out. (connect timeout=10)'))
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


353it [2:56:38, 68.85s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


354it [2:57:20, 60.61s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


355it [2:58:16, 59.45s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1103/PhysRevLett.26.1190 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fd569f2300>, 'Connection to opencitations.net timed out. (connect timeout=10)'))
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


357it [2:59:09, 40.50s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


358it [3:00:11, 47.00s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
In crossref got an exception The read operation timed out
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1016/0029-5582(63)90505-4 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fd3986c200>, 'Connection to opencitations.net timed out. (connect timeout=10)'))
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1016/0031-9163(62)90069-0 (Caused by ConnectTimeoutErr

360it [3:03:00, 62.02s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


361it [3:03:11, 46.95s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


362it [3:03:23, 36.42s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(h

363it [3:05:26, 62.45s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


365it [3:06:03, 38.96s/it]

In crossref got an exception timed out
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Max retries exceeded with url: /index/coci/api/v1/citations/10.1103/PhysRevB.74.094513 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x72fe415cd2b0>, 'Connection to opencitations.net timed out. (connect timeout=10)'))


380it [3:08:31,  5.00s/it]

In crossref got an exception Client error '404 Not Found' for url 'https://api.crossref.org/works/10.4032/9789814613972'
For more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/404
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


393it [3:10:13,  8.84s/it]

HTTP error: 400 Client Error: Bad Request for url: https://opencitations.net/index/coci/api/v1/citations/10.1007/BF01303701
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


399it [3:11:43, 12.67s/it]

In crossref got an exception timed out


401it [3:12:22, 16.32s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


424it [3:17:13,  8.31s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


425it [3:17:55, 18.57s/it]

HTTP error: 400 Client Error: Bad Request for url: https://opencitations.net/index/coci/api/v1/citations/10.1007/BF01303701
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)
Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


427it [3:18:52, 21.33s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


435it [3:19:43,  5.96s/it]

In crossref got an exception _ssl.c:983: The handshake operation timed out


439it [3:20:46,  9.63s/it]

Request failed: HTTPSConnectionPool(host='opencitations.net', port=443): Read timed out. (read timeout=10)


441it [3:21:31, 14.83s/it]

In crossref got an exception timed out


450it [3:24:22, 27.25s/it]


In [6]:
def quick_info(g):
    print(f"Number of resulting nodes: {len(g.nodes)}")
    print(f"Number of edges in resulting graph: {len(g.edges)}")
    if nx.is_strongly_connected(g):
        print("The graph is strongly connected!")
    elif nx.is_weakly_connected(g):
        print("The graph is weakly connected.")
    else:
        print(f"The graph is not connected, having {nx.number_strongly_connected_components(g)} strongly connected components")
        print(f"And {nx.number_weakly_connected_components(g)} weakly connected components")

In [7]:
quick_info(graph)

Number of resulting nodes: 309823
Number of edges in resulting graph: 503819
The graph is not connected, having 309789 strongly connected components
And 39 weakly connected components


In [None]:
# save graph to graph.pickle
pickle.dump(graph, open("../data/graph.pickle", "wb"))

In [4]:
graph = pickle.load(open("../data/graph.pickle", "rb"))

In [9]:
largest_cc = max(nx.weakly_connected_components(graph))
subgraph = nx.subgraph(graph, largest_cc)

In [10]:
quick_info(subgraph)

Number of resulting nodes: 307888
Number of edges in resulting graph: 501433
The graph is weakly connected.


Теперь посчитаем метрики центральностей

In [11]:
def calc_centralities(g):
    deg = nx.degree_centrality(g)
    bet = nx.betweenness_centrality(g)
    eig = nx.eigenvector_centrality(g)
    clos = nx.closeness_centrality(g)
    pagerank = nx.pagerank(g)
    return {"Degree": deg, "Betweenness": bet, "Eigen": eig, "Closeness": clos, "PageRank": pagerank}

In [None]:
results = calc_centralities(subgraph)

Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x7df8b2dc8f50>>
Traceback (most recent call last):
  File "/home/vunz/Projects/Diploma/jup/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(

KeyboardInterrupt: 


Для посчета метрик я попросил запустить вычисления на сервере. Поэтому дальше обрабатываю json результаты

In [16]:
import json
import collections

In [6]:
degree = json.load(open("../data/degree.json"))
eigen = json.load(open("../data/eigen.json"))
closeness = json.load(open("../data/closeness.json"))
betweenness = json.load(open("../data/betweenness.json"))
pagerank = json.load(open("../data/pagerank.json"))

In [17]:
degree = collections.OrderedDict(sorted(degree.items()))
eigen = collections.OrderedDict(sorted(eigen.items()))
closeness = collections.OrderedDict(sorted(closeness.items()))
betweenness = collections.OrderedDict(sorted(betweenness.items()))
pagerank = collections.OrderedDict(sorted(pagerank.items()))

In [32]:
df_metrics = pd.DataFrame(degree.values(), index=degree.keys()).rename({0: "degree"}, axis=1)

In [33]:
df_metrics["eigen"] = eigen.values()
df_metrics["closeness"] = closeness.values()
df_metrics["betweenness"] = betweenness.values()
df_metrics["pagerank"] = pagerank.values()

In [35]:
df_metrics.nlargest(10, "degree")

Unnamed: 0,degree,eigen,closeness,betweenness,pagerank
10.1017/CBO9780511524646,0.01853,5.301566e-22,0.016578,0.0,0.005365
10.1103/PhysRev.93.99,0.018427,0.0008474252,0.026207,3.522018e-06,0.005208
10.1103/RevModPhys.57.287,0.018175,1.696595e-14,0.014253,5.865808e-05,0.004879
10.1103/PhysRev.131.2766,0.01803,0.001267654,0.02793,7.359271e-06,0.004439
10.1103/PhysRev.115.485,0.017426,0.01172719,0.046483,5.679642e-06,0.004692
10.1016/0550-3213(79)90022-1,0.01335,0.002890676,0.026376,0.0001263169,0.002438
10.1006/adnd.1995.1002,0.011003,4.709557e-16,0.011959,0.0,0.002037
10.1143/JJAP.27.L209,0.010771,8.102113e-26,0.008158,2.996363e-06,0.003158
10.1103/PhysRev.130.2529,0.010367,0.001902232,0.028841,4.395669e-06,0.002767
10.1016/0031-9163(62)91369-0,0.010267,0.002542276,0.02597,8.714625e-07,0.002733


In [36]:
df_metrics.nlargest(10, "eigen")

Unnamed: 0,degree,eigen,closeness,betweenness,pagerank
10.1103/PhysRev.83.688,2.6e-05,0.21672,0.042345,0.0,3.5e-05
10.1103/PhysRev.93.233,2.3e-05,0.154497,0.043223,0.0,0.000159
10.1103/PhysRev.87.693,2.9e-05,0.152102,0.042732,0.0,3.3e-05
10.1103/PhysRev.87.1100,2.9e-05,0.151775,0.037266,0.0,2.9e-05
10.1103/PhysRev.88.1142,2.6e-05,0.147518,0.039551,0.0,2.8e-05
10.1103/PhysRev.90.497,0.000265,0.139797,0.039791,3e-06,7.1e-05
10.1103/PhysRev.104.254,0.00592,0.136172,0.051663,1.6e-05,0.001699
10.1103/RevModPhys.13.203,0.001504,0.134626,0.070009,3.1e-05,0.000611
10.1103/PhysRev.95.1669,0.000403,0.127787,0.042116,1.6e-05,0.000102
10.1103/PhysRev.75.1736,1.3e-05,0.11459,0.036354,0.0,4.5e-05


In [37]:
df_metrics.nlargest(10, "closeness")

Unnamed: 0,degree,eigen,closeness,betweenness,pagerank
10.1103/RevModPhys.13.203,0.001504,0.134626,0.070009,3.1e-05,0.000611
10.1103/PhysRev.76.790,2.3e-05,0.109866,0.06402,0.0,0.000443
10.1103/PhysRev.109.193,0.006223,0.049664,0.063183,7.4e-05,0.001911
10.1103/RevModPhys.8.82,2.3e-05,0.038946,0.062279,0.0,0.000595
10.1016/S0031-8914(40)90099-4,1.6e-05,0.072516,0.061988,0.0,0.000408
10.1103/PhysRev.75.486,1.6e-05,0.04951,0.06123,0.0,0.000305
10.1007/BF01339504,0.003721,0.06955,0.060623,0.0,0.001081
10.1016/S0031-8914(39)90090-X,0.001023,0.067188,0.060182,9e-06,0.000424
10.1038/143201a0,6e-06,0.099708,0.059525,0.0,7.1e-05
10.1016/S0031-8914(39)90089-3,1e-05,0.067187,0.059116,0.0,9e-05


In [38]:
df_metrics.nlargest(10, "betweenness")

Unnamed: 0,degree,eigen,closeness,betweenness,pagerank
10.1016/0370-2693(71)90665-4,0.000817,0.0005517147,0.032316,0.00029,0.00015
10.1002/prop.19610091102,0.001194,0.0007558833,0.025299,0.000286,5.3e-05
10.1016/S0370-1573(87)80002-9,0.007572,1.473134e-08,0.017955,0.000272,0.001692
10.1016/0550-3213(69)90216-8,1e-05,0.0003029627,0.02746,0.000237,6e-06
10.1016/0550-3213(79)90023-3,0.006881,0.002890644,0.024697,0.000223,0.001117
10.1103/RevModPhys.44.284,0.001052,0.0005418362,0.01637,0.000221,0.000137
10.1007/BF02728133,0.002524,0.002167925,0.02249,0.000208,0.000668
10.1016/0370-2693(72)90420-0,0.000804,0.0006623604,0.03398,0.000205,0.00032
10.1016/0550-3213(72)90190-3,1e-05,0.00108387,0.018439,0.0002,0.000118
10.1016/0370-1573(81)90059-4,0.004864,4.02598e-09,0.011487,0.000184,0.000915


In [39]:
df_metrics.nlargest(10, "pagerank")

Unnamed: 0,degree,eigen,closeness,betweenness,pagerank
10.1017/CBO9780511524646,0.01853,5.301566e-22,0.016578,0.0,0.005365
10.1103/PhysRev.93.99,0.018427,0.0008474252,0.026207,3.522018e-06,0.005208
10.1103/RevModPhys.57.287,0.018175,1.696595e-14,0.014253,5.865808e-05,0.004879
10.1103/PhysRev.115.485,0.017426,0.01172719,0.046483,5.679642e-06,0.004692
10.1103/PhysRev.131.2766,0.01803,0.001267654,0.02793,7.359271e-06,0.004439
10.1016/0370-2693(75)90162-8,2.6e-05,0.01681489,0.033549,0.0,0.003271
10.1143/JJAP.27.L209,0.010771,8.102113e-26,0.008158,2.996363e-06,0.003158
10.1016/0370-2693(75)90163-X,0.007504,0.02132962,0.041031,1.593069e-07,0.002855
10.1088/0022-3719/4/14/022,0.010048,2.49875e-25,0.008344,4.143266e-06,0.002795
10.1103/PhysRev.130.2529,0.010367,0.001902232,0.028841,4.395669e-06,0.002767


In [50]:
jinr_subdf = df_metrics[df_metrics.index.isin(samp['doi'])]

In [51]:
jinr_subdf.nlargest(10, "pagerank")

Unnamed: 0,degree,eigen,closeness,betweenness,pagerank
10.1016/0031-9163(62)90221-4,0.0001,1.497168e-07,0.012742,9.965129e-06,0.000224
10.1016/0031-9163(65)90885-1,0.000939,6.1559960000000007e-33,0.000913,9.747595e-07,0.000215
10.1007/BF02777988,0.000261,0.001083894,0.018499,5.936431e-06,0.000167
10.1007/BF02742679,0.000381,5.372915e-12,0.003531,4.970233e-07,0.000129
10.1080/00150198008009006,0.000394,2.56705e-33,0.000381,0.0,0.000103
10.1016/0370-2693(68)90437-1,0.000355,4.3106e-08,0.006791,1.2695e-06,7.1e-05
10.1007/BF02746567,0.000336,5.506903e-09,0.008671,7.710729e-06,6e-05
10.1007/BF02891914,0.000297,1.119957e-09,0.010095,1.215018e-06,4.9e-05
10.1103/PhysRevLett.93.142001,0.000239,5.886459e-26,0.000208,2.851118e-08,4.8e-05
10.1070/RM2001v056n01ABEH000374,0.000242,1.5012420000000001e-33,0.000223,0.0,4.7e-05


На этом этапе стоит разобраться в корректности аггрегирования метрик для получения одной более интуитивной и уникальной оценки вершины

In [6]:
def agg_centralities():
    pass

Время визуализации. Постараемся нарисовать как можно более понятный рисунок, используя кластеризация и без неё