In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import networkx as nx
from networkx.algorithms import bipartite
from matplotlib.ticker import AutoMinorLocator
from matplotlib.ticker import MultipleLocator

In [2]:
book = pd.read_excel('your path\\booklist_1.0.xlsx', header = 0)
book['decade'] = 10*(book['时间']//10)
book1 = book[book.学科=='哲学']
book2 = book1[['出版地', 'decade']].copy()
book3 = book2.groupby('出版地')['decade'].value_counts().rename('weight').reset_index()
B = nx.Graph()
B.add_nodes_from(book3['出版地'].to_numpy(), bipartite=0)
B.add_nodes_from(book3['decade'].to_numpy(), bipartite=1)
B.add_weighted_edges_from(book3.to_numpy())
city_nodes = {n for n, d in B.nodes(data=True) if d['bipartite'] == 0}
year_nodes = set(B) - city_nodes
degcity, degyear = bipartite.degrees(B, year_nodes, weight='weight')
degcity1 = pd.DataFrame(pd.DataFrame(degcity)[1].value_counts())
degyear1 = pd.DataFrame(pd.DataFrame(degyear)[1].value_counts())
degcity1['频率'] = degcity1[1]/degcity1[1].sum()
degyear1['频率'] = degyear1[1]/degyear1[1].sum()
degcity1.sort_index(inplace=True)
degyear1.sort_index(inplace=True)

In [3]:
#给城市编号
city_nodes1 = list(city_nodes)
city_nodes1.sort()
citylist = np.array(city_nodes1)
citynum = list(np.arange(len(city_nodes)))
city_dict = dict(zip(citynum,city_nodes1))
#给时间编号
year_nodes1 = list(year_nodes)
year_nodes1.sort()
yearlist = np.array(year_nodes1)
yearnum = list(np.arange(len(year_nodes)))
year_dict = dict(zip(yearnum,year_nodes1))
RCA = np.zeros((len(city_nodes),len(year_nodes)))
for i in range(len(year_nodes)):
    goodsum = book3['weight'].sum()
    book4 = book3[book3.decade==yearlist[i]]
    goodsum_ = book4['weight'].sum()
    rca1 = goodsum_/goodsum
    for j in range(len(city_nodes)):
        book5 = book3[book3.出版地==citylist[j]]
        citygoodsum = book5['weight'].sum()
        book6 = book5[book5.decade==yearlist[i]]
        citygoodsum_ = book6['weight'].sum()
        rca2 = citygoodsum_/citygoodsum
        RCA[j, i] = rca2/rca1

In [4]:
RCA_ = RCA
RCA_[RCA_ > 1] = 1
RCA_[RCA_ < 1] = 0
proximity = np.zeros((len(year_nodes),len(year_nodes)))
for i in range(len(year_nodes)):
    for j in range(len(year_nodes)):
        if i==j:
            proximity[i,j] = 0
        else:
            p1 = RCA_[:,[i,j]][RCA_[:,j]==1][:,0].sum()
            p2 = RCA_[:,[i,j]][RCA_[:,j]==1][:,1].sum()
            proximity1 = p1/p2
            p3 = RCA_[:,[i,j]][RCA_[:,i]==1][:,1].sum()
            p4 = RCA_[:,[i,j]][RCA_[:,i]==1][:,0].sum()
            proximity2 = p3/p4
            proximity[i,j] = min(proximity1,proximity2)

In [5]:
G = nx.from_numpy_matrix(proximity)
G_edges = nx.to_pandas_edgelist(G)
G_edges['source'].replace(year_dict, inplace=True)
G_edges['target'].replace(year_dict, inplace=True)
G = nx.from_pandas_edgelist(G_edges)
G.add_weighted_edges_from(G_edges.to_numpy())

In [6]:
T1 = nx.maximum_spanning_tree(G)
nx.set_node_attributes(T1, dict(degyear), 'dergee')
T1_edges = nx.to_pandas_edgelist(T1)

In [7]:
nx.write_gexf(T1, 'your path\\product_space_tree_phil.gexf')

In [8]:
from itertools import chain

In [9]:
proximityarray = list(chain.from_iterable(proximity))

In [10]:
proximityarray = list(filter(None, proximityarray))

In [11]:
proximityquantile = np.quantile(proximityarray, 0.75)

In [12]:
proximity_ = proximity.copy()
proximity_[proximity_ < proximityquantile] = 0
T2 = nx.from_numpy_matrix(proximity_)
T2_edges = nx.to_pandas_edgelist(T2)
T2_edges['source'].replace(year_dict, inplace=True)
T2_edges['target'].replace(year_dict, inplace=True)

In [13]:
T_edges = pd.concat([T1_edges, T2_edges])
T_edges = T_edges.drop_duplicates()
T_edges.index = np.arange(len(T_edges.index))
T = nx.from_pandas_edgelist(T_edges)
T.add_weighted_edges_from(T_edges.to_numpy())
nx.set_node_attributes(T, dict(degyear), 'dergee')

In [14]:
betweenness_centrality_math = pd.DataFrame.from_dict(nx.betweenness_centrality(T, weight='weight'), orient='index')
betweenness_centrality_math.sort_index(inplace=True)

In [15]:
betweenness_centrality_math

Unnamed: 0,0
1680,0.002374
1690,0.023219
1700,0.003894
1710,0.037349
1720,0.008642
1730,0.041406
1740,0.0
1750,0.0
1760,0.005413
1770,0.060684


In [16]:
nx.write_gexf(T, 'your path\\product_space_phil_plus_quantile.gexf')

In [17]:
citylist_country = pd.read_excel('your path\\citylist.xlsx', header = 0)
citylist_country = citylist_country[citylist_country['country'] == 'United States']
citylist_country.index = np.arange(len(citylist_country.index))
US_x = []
for i in range(28):
    US_x.append(np.where(citylist == citylist_country['name'][i])[0])
US_x = [x for x in US_x if x != None]
US_list = []
for i in range(len(US_x)):
    US_list.append(US_x[i][0])
RCA_US = RCA_[US_list,:]

  US_x = [x for x in US_x if x != None]


In [18]:
US_listname = []
for i in range(len(US_list)):
    US_listname.append(city_dict[US_list[i]])

In [19]:
US_listname

['New York',
 'Chicago',
 'Boston',
 'New Haven',
 'Cambridge, USA',
 'Berkeley',
 'La Salle, IL, USA']