In [26]:
import pandas as pd

subdomains = pd.read_excel('./dataset/subdomains.xlsx', sheet_name='英文领域库')
# 把所有列合并成一个 Series，然后去掉 NaN，再转成 list
all_values = subdomains.values.ravel()              # 拉平成一维数组
all_values = pd.Series(all_values).dropna() # 去掉 NaN
result_list = all_values.tolist()           # 转换成 Python list
result_list

['Algebra And Geometry',
 'Synthetic Chemistry',
 'Microbiology',
 'Geographical Science',
 'Metal Material',
 'Electronics And Information Systems',
 'Management Science And Engineering',
 'Respiratory System',
 'Analytics',
 'Catalysis And Surface Interface Chemistry',
 'Botany',
 'Geology',
 'Inorganic Nonmetallic Materials',
 'Computer Science',
 'Business Administration',
 'Circulatory System',
 'Differential Equations And Dynamical Systems',
 'Chemical Theory And Mechanism',
 'Ecology',
 'Geochemistry',
 'Organic Polymer Materials',
 'Automation',
 'Economic Science',
 'Digestive System',
 'Statistics And Operations',
 'Chemometrics',
 'Zoology',
 'Geophysics And Space Physics',
 'Mining And Metallurgical Engineering',
 'Semiconductor Science And Information Devices',
 'Macro Management And Policy',
 'Reproductive System, Perinatal Medicine, And Neonatology',
 'Computational Mathematics',
 'Materials Chemistry',
 'Biophysics And Biochemistry',
 'Atmospheric Sciences',
 'Machinery

In [27]:
import pandas as pd
from sentence_transformers import SentenceTransformer

# 准备学科列表
subdomains_list = result_list

# 加载模型
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# 提取向量
subdomain_vectors = model.encode(subdomains_list, show_progress_bar=True)

# 构建 DataFrame
df_subdomains = pd.DataFrame({
    'subdomains': subdomains_list,
    'vector': list(subdomain_vectors)  # 每行是一个 384 维向量
})

df_subdomains


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Unnamed: 0,subdomains,vector
0,Algebra And Geometry,"[-0.29379645, -0.094037816, -0.26413912, -0.02..."
1,Synthetic Chemistry,"[-1.1863799, -0.060999773, -0.53667724, 0.3660..."
2,Microbiology,"[-0.23142917, -0.18817297, -0.6428789, -0.0516..."
3,Geographical Science,"[0.69880164, 0.13903742, 0.24993387, -0.104750..."
4,Metal Material,"[-0.54685813, 0.5090537, -0.24068758, 0.654598..."
...,...,...
121,Traditional Chinese Medical Science,"[-0.37594286, 0.6822629, -0.36338052, 0.270276..."
122,Traditional Chinese Pharmacology,"[-0.85987806, 0.5020133, -0.31204212, -0.13344..."
123,Integrated Traditional Chinese And Western Med...,"[-0.21536969, 0.704277, -0.08507674, 0.3233005..."
124,Materia Medica,"[0.41797128, 0.0544053, -0.29772034, 0.1013729..."


In [28]:
df_subdomains.to_parquet('./dataset/subdomain_vector.parquet')

In [29]:
import pandas as pd

data1 = pd.read_parquet('./dataset/vector.parquet')
corpus = data1[data1['term']=='geology']
refs = data1[data1['term']=='thisisref']

In [30]:
corpus.shape

(39868, 7)

In [31]:
refs.shape

(675565, 7)

In [32]:
domains = pd.read_parquet('./dataset/subdomain_vector.parquet')
domains

Unnamed: 0,subdomains,vector
0,Algebra And Geometry,"[-0.29379645, -0.094037816, -0.26413912, -0.02..."
1,Synthetic Chemistry,"[-1.1863799, -0.060999773, -0.53667724, 0.3660..."
2,Microbiology,"[-0.23142917, -0.18817297, -0.6428789, -0.0516..."
3,Geographical Science,"[0.69880164, 0.13903742, 0.24993387, -0.104750..."
4,Metal Material,"[-0.54685813, 0.5090537, -0.24068758, 0.654598..."
...,...,...
121,Traditional Chinese Medical Science,"[-0.37594286, 0.6822629, -0.36338052, 0.270276..."
122,Traditional Chinese Pharmacology,"[-0.85987806, 0.5020133, -0.31204212, -0.13344..."
123,Integrated Traditional Chinese And Western Med...,"[-0.21536969, 0.704277, -0.08507674, 0.3233005..."
124,Materia Medica,"[0.41797128, 0.0544053, -0.29772034, 0.1013729..."


In [33]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

ref_vectors = np.stack(refs['vector'])
domain_vectors = np.stack(domains['vector'])

print(1)
sim_matrix = cosine_similarity(ref_vectors, domain_vectors)
print(2)
# 取最大值匹配的领域
best_match_idx = sim_matrix.argmax(axis=1)
refs['assigned_domain'] = [domains['subdomains'][i] for i in best_match_idx]
refs['similarity_score'] = sim_matrix.max(axis=1)



1
2


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  refs['assigned_domain'] = [domains['subdomains'][i] for i in best_match_idx]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  refs['similarity_score'] = sim_matrix.max(axis=1)


In [34]:
cite = pd.read_csv('./dataset/clean_cite.csv')
cite

Unnamed: 0,eid,ref_eid
0,2-s2.0-85111011520,2-s2.0-0028405041
1,2-s2.0-85111011520,2-s2.0-0027331194
2,2-s2.0-85111011520,2-s2.0-85083822098
3,2-s2.0-85111011520,2-s2.0-79961139006
4,2-s2.0-85111011520,2-s2.0-85067258849
...,...,...
1875868,2-s2.0-84921959907,2-s2.0-0023477638
1875869,2-s2.0-84921959907,2-s2.0-84864556957
1875870,2-s2.0-84921959907,2-s2.0-0021405378
1875871,2-s2.0-84921959907,2-s2.0-0023119813


In [35]:
merged_df1 = pd.merge(cite, corpus[['id', 'time']], left_on='eid', right_on='id', how='inner')
merged_df1

Unnamed: 0,eid,ref_eid,id,time
0,2-s2.0-85111011520,2-s2.0-0028405041,2-s2.0-85111011520,2021
1,2-s2.0-85111011520,2-s2.0-0027331194,2-s2.0-85111011520,2021
2,2-s2.0-85111011520,2-s2.0-85083822098,2-s2.0-85111011520,2021
3,2-s2.0-85111011520,2-s2.0-79961139006,2-s2.0-85111011520,2021
4,2-s2.0-85111011520,2-s2.0-85067258849,2-s2.0-85111011520,2021
...,...,...,...,...
1671397,2-s2.0-80955136511,2-s2.0-0002388860,2-s2.0-80955136511,2011
1671398,2-s2.0-80955136511,2-s2.0-0033886791,2-s2.0-80955136511,2011
1671399,2-s2.0-80955136511,2-s2.0-33845589304,2-s2.0-80955136511,2011
1671400,2-s2.0-80955136511,2-s2.0-33745728148,2-s2.0-80955136511,2011


In [36]:
merged_df = pd.merge(merged_df1, refs[['id', 'assigned_domain', 'journal', 'abstract','term']], left_on='ref_eid', right_on='id', how='inner')
merged_df

Unnamed: 0,eid,ref_eid,id_x,time,id_y,assigned_domain,journal,abstract,term
0,2-s2.0-85111011520,2-s2.0-0028405041,2-s2.0-85111011520,2021,2-s2.0-0028405041,Astronomical Techniques And Methods,Radiation Measurements,"In previous work, we have discussed the way in...",thisisref
1,2-s2.0-85111011520,2-s2.0-0027331194,2-s2.0-85111011520,2021,2-s2.0-0027331194,Environmental Geoscience,Quaternary Science Reviews,Spatial and temporal patterns in lake-level da...,thisisref
2,2-s2.0-85111011520,2-s2.0-85083822098,2-s2.0-85111011520,2021,2-s2.0-85083822098,Forestry And Grass Science,Quaternary Science Reviews,High-resolution pollen and charcoal records fr...,thisisref
3,2-s2.0-85111011520,2-s2.0-79961139006,2-s2.0-85111011520,2021,2-s2.0-79961139006,Geographical Science,Australian Journal of Earth Sciences,"By comparison with the Northern Hemisphere, th...",thisisref
4,2-s2.0-85111011520,2-s2.0-85067258849,2-s2.0-85111011520,2021,2-s2.0-85067258849,Geology,Scientific Drilling,A 70m long continental sediment record was rec...,thisisref
...,...,...,...,...,...,...,...,...,...
1543051,2-s2.0-80955136511,2-s2.0-0002388860,2-s2.0-80955136511,2011,2-s2.0-0002388860,Statistics And Operations,Psychometrika,A distinction is drawn between redundancy meas...,thisisref
1543052,2-s2.0-80955136511,2-s2.0-0033886791,2-s2.0-80955136511,2011,2-s2.0-0033886791,Analytics,IEEE Transactions on Image Processing,This paper discusses the interest of binary pa...,thisisref
1543053,2-s2.0-80955136511,2-s2.0-33845589304,2-s2.0-80955136511,2011,2-s2.0-33845589304,Geographical Science,Proceedings of the IEEE Computer Society Confe...,"In this paper we propose diffusion distance, a...",thisisref
1543054,2-s2.0-80955136511,2-s2.0-33745728148,2-s2.0-80955136511,2011,2-s2.0-33745728148,Artificial Intelligence,International Geoscience and Remote Sensing Sy...,The hierarchical image segmentation (HSEG) alg...,thisisref


In [37]:
merged_df.to_parquet('sub_database.parquet')

In [38]:
import pandas as pd

data = pd.read_parquet('sub_database.parquet')
data

Unnamed: 0,eid,ref_eid,id_x,time,id_y,assigned_domain,journal,abstract,term
0,2-s2.0-85111011520,2-s2.0-0028405041,2-s2.0-85111011520,2021,2-s2.0-0028405041,Astronomical Techniques And Methods,Radiation Measurements,"In previous work, we have discussed the way in...",thisisref
1,2-s2.0-85111011520,2-s2.0-0027331194,2-s2.0-85111011520,2021,2-s2.0-0027331194,Environmental Geoscience,Quaternary Science Reviews,Spatial and temporal patterns in lake-level da...,thisisref
2,2-s2.0-85111011520,2-s2.0-85083822098,2-s2.0-85111011520,2021,2-s2.0-85083822098,Forestry And Grass Science,Quaternary Science Reviews,High-resolution pollen and charcoal records fr...,thisisref
3,2-s2.0-85111011520,2-s2.0-79961139006,2-s2.0-85111011520,2021,2-s2.0-79961139006,Geographical Science,Australian Journal of Earth Sciences,"By comparison with the Northern Hemisphere, th...",thisisref
4,2-s2.0-85111011520,2-s2.0-85067258849,2-s2.0-85111011520,2021,2-s2.0-85067258849,Geology,Scientific Drilling,A 70m long continental sediment record was rec...,thisisref
...,...,...,...,...,...,...,...,...,...
1543051,2-s2.0-80955136511,2-s2.0-0002388860,2-s2.0-80955136511,2011,2-s2.0-0002388860,Statistics And Operations,Psychometrika,A distinction is drawn between redundancy meas...,thisisref
1543052,2-s2.0-80955136511,2-s2.0-0033886791,2-s2.0-80955136511,2011,2-s2.0-0033886791,Analytics,IEEE Transactions on Image Processing,This paper discusses the interest of binary pa...,thisisref
1543053,2-s2.0-80955136511,2-s2.0-33845589304,2-s2.0-80955136511,2011,2-s2.0-33845589304,Geographical Science,Proceedings of the IEEE Computer Society Confe...,"In this paper we propose diffusion distance, a...",thisisref
1543054,2-s2.0-80955136511,2-s2.0-33745728148,2-s2.0-80955136511,2011,2-s2.0-33745728148,Artificial Intelligence,International Geoscience and Remote Sensing Sy...,The hierarchical image segmentation (HSEG) alg...,thisisref


In [39]:
data['assigned_domain'].value_counts()

assigned_domain
Geology                             447372
Geochemistry                        286680
Environmental Geoscience            110222
Atmospheric Sciences                 70290
Environmental Chemistry              49878
                                     ...  
Traditional Chinese Pharmacology       100
Veterinary Medicine                    100
Medical Immunology                      77
Materia Medica                          73
Special Medicine                        56
Name: count, Length: 126, dtype: int64

In [40]:
import pandas as pd

# 假设你的 DataFrame 名叫 df
# 示例代码
df_grouped = data.groupby('eid').agg({
    'ref_eid': list,
    'time': 'first',
    'assigned_domain': list,
}).reset_index()

In [41]:
df_grouped

Unnamed: 0,eid,ref_eid,time,assigned_domain
0,2-s2.0-0000076956,"[2-s2.0-0026614945, 2-s2.0-0024197991, 2-s2.0-...",2000,"[Geochemistry, Stars And The Interstellar Medi..."
1,2-s2.0-0000154187,"[2-s2.0-84879889857, 2-s2.0-0346870530, 2-s2.0...",2001,"[Geology, Geology, Geochemistry, Geochemistry,..."
2,2-s2.0-0000213397,"[2-s2.0-0041382198, 2-s2.0-0024221297, 2-s2.0-...",2001,"[Statistics And Operations, Geology, The Inter..."
3,2-s2.0-0000251905,"[2-s2.0-0019390193, 2-s2.0-84879880120, 2-s2.0...",2000,"[Marine Science, Geology, Environmental Geosci..."
4,2-s2.0-0000292382,"[2-s2.0-0024850051, 2-s2.0-0030471753, 2-s2.0-...",2001,"[Atmospheric Sciences, Geochemistry, Statistic..."
...,...,...,...,...
39378,2-s2.0-9744287123,"[2-s2.0-0027061958, 2-s2.0-0028563248]",2000,"[Geology, Geology]"
39379,2-s2.0-9944222633,"[2-s2.0-9944241388, 2-s2.0-9944263029]",2000,"[Environmental Geoscience, Geographical Science]"
39380,2-s2.0-9944231880,"[2-s2.0-0035056250, 2-s2.0-0033025887, 2-s2.0-...",2005,"[Computational Mathematics, Environmental Geos..."
39381,2-s2.0-9944242258,"[2-s2.0-0035989619, 2-s2.0-0037357802, 2-s2.0-...",2004,"[Microbiology, Food Science, Microbiology, Med..."


In [42]:
# 展开 assigned_domain 为单独行
df_exploded = df_grouped.explode('assigned_domain')

# 按年份和领域统计数量
domain_counts = df_exploded.groupby(['time', 'assigned_domain']).size().reset_index(name='count')

# 构造成每行是年份，每列是领域的格式
pivot_df = domain_counts.pivot(index='time', columns='assigned_domain', values='count').fillna(0).astype(int)
pivot_df


assigned_domain,"Accelerators, Reactors And Detectors",Acoustics,Acute And Critical Medicine,Algebra And Geometry,Analytics,Animal Science,Aquatic Products Science,Architecture And Civil Engineering,Artificial Intelligence,Astronomical Techniques And Methods,...,Synthetic Chemistry,The Intersection Of Mathematics And Other Disciplines,Traditional Chinese Medical Science,Traditional Chinese Pharmacology,Transportation And Delivery Engineering,Trauma Burn Plastic Surgery,Urinary System,Veterinary Medicine,Water Conservancy Project,Zoology
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1996,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1997,0,2,0,0,2,0,0,0,0,2,...,0,0,0,0,0,0,0,0,1,0
1998,0,0,0,0,0,1,0,0,0,1,...,0,1,0,0,0,0,0,0,0,0
1999,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2000,149,121,2,37,103,68,223,62,66,590,...,25,61,11,0,11,4,6,2,313,142
2001,137,128,28,42,142,37,348,80,51,628,...,38,59,6,1,28,6,9,1,431,257
2002,134,110,12,52,103,51,300,71,34,668,...,25,67,9,1,17,3,1,0,470,158
2003,148,100,15,67,114,43,338,84,72,622,...,24,56,12,2,15,5,2,4,586,164
2004,160,129,7,59,176,59,413,81,109,578,...,40,58,7,1,35,10,2,0,642,325
2005,174,131,1,64,213,98,394,129,74,829,...,27,66,15,3,27,11,6,2,793,378


In [43]:
pivot_df.sum()

assigned_domain
Accelerators, Reactors And Detectors     4575
Acoustics                                4353
Acute And Critical Medicine               250
Algebra And Geometry                     1063
Analytics                                8274
                                        ...  
Trauma Burn Plastic Surgery               181
Urinary System                            154
Veterinary Medicine                       100
Water Conservancy Project               27539
Zoology                                  9024
Length: 126, dtype: int64

In [44]:
lv = pivot_df.pct_change()

n = 5
lv.tail(n).mean().sort_values(ascending=False)

assigned_domain
Materia Medica                                         inf
Nervous System                                         inf
Medical Immunology                                2.141667
Preventive Medicine                               1.201026
Rehabilitation Medicine                           1.109524
                                                    ...   
Genetics And Bioinformatics                      -0.062435
Developmental Biology And Reproductive Biology   -0.071189
Forensic Medicine                                -0.076046
Pharmacology                                     -0.076726
Biophysics And Biochemistry                      -0.088180
Length: 126, dtype: float64

In [45]:
pivot_df.to_csv('子学科年份表sub_database.csv', index=False)

In [46]:
pivot_df

assigned_domain,"Accelerators, Reactors And Detectors",Acoustics,Acute And Critical Medicine,Algebra And Geometry,Analytics,Animal Science,Aquatic Products Science,Architecture And Civil Engineering,Artificial Intelligence,Astronomical Techniques And Methods,...,Synthetic Chemistry,The Intersection Of Mathematics And Other Disciplines,Traditional Chinese Medical Science,Traditional Chinese Pharmacology,Transportation And Delivery Engineering,Trauma Burn Plastic Surgery,Urinary System,Veterinary Medicine,Water Conservancy Project,Zoology
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1996,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1997,0,2,0,0,2,0,0,0,0,2,...,0,0,0,0,0,0,0,0,1,0
1998,0,0,0,0,0,1,0,0,0,1,...,0,1,0,0,0,0,0,0,0,0
1999,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2000,149,121,2,37,103,68,223,62,66,590,...,25,61,11,0,11,4,6,2,313,142
2001,137,128,28,42,142,37,348,80,51,628,...,38,59,6,1,28,6,9,1,431,257
2002,134,110,12,52,103,51,300,71,34,668,...,25,67,9,1,17,3,1,0,470,158
2003,148,100,15,67,114,43,338,84,72,622,...,24,56,12,2,15,5,2,4,586,164
2004,160,129,7,59,176,59,413,81,109,578,...,40,58,7,1,35,10,2,0,642,325
2005,174,131,1,64,213,98,394,129,74,829,...,27,66,15,3,27,11,6,2,793,378


In [47]:
pivot_df.sum().sort_values(ascending=False)

assigned_domain
Geology                             447372
Geochemistry                        286680
Environmental Geoscience            110222
Atmospheric Sciences                 70290
Environmental Chemistry              49878
                                     ...  
Traditional Chinese Pharmacology       100
Veterinary Medicine                    100
Medical Immunology                      77
Materia Medica                          73
Special Medicine                        56
Length: 126, dtype: int64

In [48]:
subdomains = pd.read_excel('./dataset/subdomains.xlsx', sheet_name='英文领域库')
subdomains

Unnamed: 0,Mathematical And Physical Sciences,Chemical Sciences,Life Sciences,Earth Sciences,Engineering And Materials Sciences,Information Sciences,Management Sciences,Health Sciences
0,Algebra And Geometry,Synthetic Chemistry,Microbiology,Geographical Science,Metal Material,Electronics And Information Systems,Management Science And Engineering,Respiratory System
1,Analytics,Catalysis And Surface Interface Chemistry,Botany,Geology,Inorganic Nonmetallic Materials,Computer Science,Business Administration,Circulatory System
2,Differential Equations And Dynamical Systems,Chemical Theory And Mechanism,Ecology,Geochemistry,Organic Polymer Materials,Automation,Economic Science,Digestive System
3,Statistics And Operations,Chemometrics,Zoology,Geophysics And Space Physics,Mining And Metallurgical Engineering,Semiconductor Science And Information Devices,Macro Management And Policy,"Reproductive System, Perinatal Medicine, And N..."
4,Computational Mathematics,Materials Chemistry,Biophysics And Biochemistry,Atmospheric Sciences,Machinery Design And Manufacture,Photonic And Optoelectronic Devices,,Urinary System
5,The Intersection Of Mathematics And Other Disc...,Environmental Chemistry,Genetics And Bioinformatics,Marine Science,Engineering Thermophysics And Energy Utilization,Artificial Intelligence,,Motor System
6,Dynamics And Control,Chemical Biology,Cell Biology,Environmental Geoscience,Electrical Science And Engineering,Information Science In Interdisciplinary Disci...,,Endocrine System Metabolism And Nutritional Su...
7,Solid Mechanics,Chemical Engineering And Industrial Chemistry,Immunology,,Architecture And Civil Engineering,,,Blood System
8,Hydromechanics,Energy Chemistry,Neuroscience And Psychology,,Water Conservancy Project,,,Nervous System
9,Biomechanics,,"Biomaterials, Imaging And Tissue Engineering",,Environmental Engineering,,,Mental Health And Psychological Health


In [49]:
import pandas as pd

# counts: 子领域 -> 引用次数 的 Series
# subdomains: 大领域 -> 子领域 的表格
counts = pivot_df.sum().sort_values(ascending=False)

# 展开大领域-子领域对照
mapping = subdomains.melt(var_name='major', value_name='subdomain').dropna()

# 合并引用次数
df = mapping.merge(counts.rename('citations'), left_on='subdomain', right_index=True, how='left').fillna(0)

# 在每个大领域内排序并加名次
df = df.sort_values(['major','citations'], ascending=[True,False])
df['rank'] = df.groupby('major')['citations'].rank(method='first', ascending=False).astype(int)

df


Unnamed: 0,major,subdomain,citations,rank
40,Chemical Sciences,Environmental Chemistry,49878,1
37,Chemical Sciences,Chemical Theory And Mechanism,24631,2
39,Chemical Sciences,Materials Chemistry,7875,3
41,Chemical Sciences,Chemical Biology,4683,4
38,Chemical Sciences,Chemometrics,4607,5
...,...,...,...,...
28,Mathematical And Physical Sciences,Plasma Physics,890,26
9,Mathematical And Physical Sciences,Biomechanics,861,27
13,Mathematical And Physical Sciences,Cosmology And Galaxies,735,28
23,Mathematical And Physical Sciences,Quantum Mechanics,627,29


In [51]:
df

Unnamed: 0,major,subdomain,citations,rank
40,Chemical Sciences,Environmental Chemistry,49878,1
37,Chemical Sciences,Chemical Theory And Mechanism,24631,2
39,Chemical Sciences,Materials Chemistry,7875,3
41,Chemical Sciences,Chemical Biology,4683,4
38,Chemical Sciences,Chemometrics,4607,5
...,...,...,...,...
28,Mathematical And Physical Sciences,Plasma Physics,890,26
9,Mathematical And Physical Sciences,Biomechanics,861,27
13,Mathematical And Physical Sciences,Cosmology And Galaxies,735,28
23,Mathematical And Physical Sciences,Quantum Mechanics,627,29
