In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from matplotlib import cm

In [None]:
## load data
df = (
    pd.read_parquet(
        "dfcomplement_small.parquet",
        columns=["demand","supplytype", "n_gbb", "log_n_pat", "copatgreen"],
    )
    .sort_values(["demand", "n_gbb", "log_n_pat"], ascending=False)
)
df["copatgreen"] = df["copatgreen"].astype(bool)
df['rnk'] = df.groupby('demand').cumcount()+1
df.head()

Unnamed: 0,demand,supplytype,n_gbb,log_n_pat,copatgreen,rnk
521660,39713288,COMPANY,9.0,9.733944,False,1
532511,39713288,COMPANY,9.0,8.514389,False,2
580254,39713288,COMPANY,9.0,7.17549,False,3
665088,39713288,COMPANY,9.0,6.242223,False,4
744310,39713288,COMPANY,8.0,8.580356,False,5


In [None]:
## agg by co-patneting
dfagg = df.groupby('copatgreen')['n_gbb'].agg(['count','mean','std','sem']).reset_index()
dfagg

Unnamed: 0,copatgreen,count,mean,std,sem
0,False,42733405,1.39758,2.151689,0.000329
1,True,25525,3.295122,3.680871,0.023039


In [None]:
dfagg['sem']*1.96

0    0.000645
1    0.045157
Name: sem, dtype: float64

In [None]:
## fig4a
plt.figure(figsize=(4,3))
ax=sns.barplot(dfagg,x='copatgreen',y='mean',palette=["grey", "green"])
ax.errorbar(x=dfagg.copatgreen,y=dfagg['mean'],yerr=dfagg['sem']*1.96,fmt='none',c='black')
ax.set_ylabel('GBB-complementarity')
ax.set_xlabel('green technological alliance')
plt.savefig('fig4a.pdf',bbox_inches='tight')

In [None]:
## create group and agg
df['rnkgroup'] = pd.cut(df.rnk,bins=[0,2,5,10,50,100,df.rnk.max()])
dfagg2 = df.groupby('rnkgroup')['copatgreen'].agg(['mean','std','count','sem']).reset_index()
dfagg2['rnkgroup'] = dfagg2['rnkgroup'].astype('str')
dfagg2['rnkgroup']=['[1-2]','[3-5]','[6-10]','[11-50]','[51-100]','>100']
dfagg2

In [None]:
# fig 4b
plt.figure(figsize=(5,3))
ax=sns.barplot(dfagg2,x='rnkgroup',y='mean',color='green')
ax.errorbar(x=dfagg2.rnkgroup,y=dfagg2['mean'],yerr=dfagg2['sem']*1.96,fmt='none',c='black')
ax.set_ylabel('Probability of green tech. alliance')
ax.set_xlabel('GBB-complementarity rank')
plt.savefig('fig4b.pdf',bbox_inches='tight')

In [None]:
# complementarity between types of org
typecomplement = df[df.demand==39713288].groupby('supplytype')['demand'].count().reset_index()
typecomplement['supplytype'] = typecomplement['supplytype'].str.split(' ')
typecomplement = typecomplement.explode('supplytype')
typecomplement['supplytype'] = np.where((typecomplement['supplytype']=='HOSPITAL')|(typecomplement['supplytype']=='NON-PROFIT'),'OTHER',typecomplement['supplytype'])
typecomplement = typecomplement.groupby('supplytype')['demand'].sum().reset_index()
typecomplement

Unnamed: 0,supplytype,demand
0,COMPANY,5947
1,GOV,366
2,OTHER,388
3,UNIVERSITY,929


In [None]:
typecomplement['supplytype'][1] = 'GOVERNMENT'

In [None]:
# fig4c
plt.figure(figsize=(4,3))
plt.pie(typecomplement.demand, labels=typecomplement.supplytype, autopct='%.0f%%')
plt.savefig('fig4c.pdf',bbox_inches='tight')

In [27]:
cntrycomplement = pd.read_parquet('cntrycomplement.parquet')
cntrycomplement.head()

Unnamed: 0,demandctry,supplyctry,supply,n_gbb,log_n_pat,ratio
0,ZA,US,2,25.5,8.153242,0.666667
1,ZA,KR,1,21.0,6.901737,0.333333
2,US,JP,424,13.90566,8.62706,0.385805
3,US,CN,290,16.131034,8.472486,0.263876
4,US,US,171,18.076023,7.916348,0.155596


In [None]:
## filter to focus on major countries (ignore for appendix ver)
cntrycomplement['demandctry'] = np.where(cntrycomplement['demandctry'].isin(['JP',"CN","US",'DE','KR']),cntrycomplement['demandctry'],'other')
cntrycomplement['supplyctry'] = np.where(cntrycomplement['supplyctry'].isin(['JP',"CN","US",'DE','KR']),cntrycomplement['supplyctry'],'other')
cntrycomplement = cntrycomplement.groupby(['demandctry','supplyctry'])['supply'].sum().reset_index()
cntrycomplement.head()

Unnamed: 0,demandctry,supplyctry,supply
0,CN,CN,74
1,CN,DE,123
2,CN,JP,502
3,CN,KR,53
4,CN,US,106


In [None]:
## nodes in sankey diagram
nodes = pd.DataFrame({'Id':(cntrycomplement.supplyctry+'_s').tolist()+(cntrycomplement.demandctry+'_t').tolist()}).drop_duplicates().sort_values(by='Id',ascending=False).reset_index(drop=True).reset_index()
nodes['label'] = nodes.Id.str.split('_').str[0]
nodes.head()

Unnamed: 0,index,Id,label
0,0,other_t,other
1,1,other_s,other
2,2,US_t,US
3,3,US_s,US
4,4,KR_t,KR


In [None]:
# assign color
colormap = cm.tab20
unique_labels = nodes[nodes.Id.str.endswith('_s')].label.unique()
color_positions = np.linspace(1, 0, len(unique_labels))
rgba_formatter = lambda rgba: f"rgba({int(rgba[0]*255)}, {int(rgba[1]*255)}, {int(rgba[2]*255)}, {rgba[3]})"
color_map = {label: rgba_formatter(colormap(pos)) for label, pos in zip(unique_labels, color_positions)}
nodes['color'] = nodes.label.map(color_map).fillna("rgba(211,211,211,1.0)")
nodes.head()

Unnamed: 0,index,Id,label,color
0,0,other_t,other,"rgba(158, 218, 229, 1.0)"
1,1,other_s,other,"rgba(158, 218, 229, 1.0)"
2,2,US_t,US,"rgba(188, 189, 34, 1.0)"
3,3,US_s,US,"rgba(188, 189, 34, 1.0)"
4,4,KR_t,KR,"rgba(227, 119, 194, 1.0)"


In [None]:
## edge in sankey diagram
edges2 = cntrycomplement.assign(supplyctry = cntrycomplement.supplyctry+'_s',demandctry = cntrycomplement.demandctry+'_t').merge(
    nodes[['index','Id','color']].rename(columns={"Id": "supplyctry", "index": "sourceidx"})
).merge(nodes[['index','Id']].rename(columns={"Id": "demandctry", "index": "targetidx"}))[['sourceidx','targetidx','supply','color']]
edges2.columns=['source','target','value','color']
edges2.head()

Unnamed: 0,source,target,value,color
0,11,10,74,"rgba(31, 119, 180, 1.0)"
1,9,10,123,"rgba(255, 187, 120, 1.0)"
2,7,10,502,"rgba(255, 152, 150, 1.0)"
3,5,10,53,"rgba(227, 119, 194, 1.0)"
4,3,10,106,"rgba(188, 189, 34, 1.0)"


In [None]:
## use plotly to create figure
chart = go.Sankey(link=edges2.to_dict(orient='list'), node=dict(label=nodes.label.values.tolist(),color=nodes.color.values.tolist()), arrangement="snap")
fig = go.Figure(chart,layout={'height':1000,'width':1000})

In [33]:
fig.write_html('cntrysankey_small.html')