In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib

## visualize source fields, GBBs, targets, their examples, and associations

In [None]:
## load umap reduction of cpc2vec embedding to 2d
layout = pd.read_parquet('data/external/umap2d.parquet')
layout.head()

Unnamed: 0,cpc,cpc1d,cpc4d,title,x,y
0,A01B1/00,A,A01B,Hand tools,3.285313,-2.539227
1,A01B1/02,A,A01B,Hand tools -Spades; Shovels,3.300158,-2.519649
2,A01B1/022,A,A01B,Hand tools -Spades; Shovels -Collapsible; exte...,3.284784,-2.528554
3,A01B1/024,A,A01B,Hand tools -Spades; Shovels -Foot protectors a...,0.93429,0.966105
4,A01B1/026,A,A01B,Hand tools -Spades; Shovels -with auxiliary ha...,3.243993,-2.510049


In [3]:
layout.shape

(253017, 6)

In [None]:
## load background labels
labels = pd.read_csv('bglabel.tsv',sep='\t')
labels

Unnamed: 0,x,y,label
0,11.754825,-3.390171,HUMAN NECESSITIES
1,-0.641913,9.915849,ELECTRIC & ELECTRONICS
2,-0.56291,-5.083076,TRASPORTATION
3,11.420568,6.115402,CHEMISTRY & METALLURGY
4,4.037599,2.122499,MATERIALS
5,-3.38259,3.352831,SYSTEM CONTROL


In [None]:
## load assocation of source fields and gbbs
srcpath = pd.read_parquet('data/srcpath33.parquet')
srcpath.head()

Unnamed: 0,src,path,ccmt_patent_id,rca,binrca,size,hhi,effhhi,rnk
0,s0,c0,3,1.33956,1,98,0.116618,8.575,54.5
1,s0,c1,5,0.514582,0,149,0.077969,12.825534,74.5
2,s0,c10,8,0.231331,0,164,0.036957,27.05835,95.0
3,s0,c12,18,1.415693,1,153,0.053697,18.622912,50.0
4,s0,c13,1,0.242206,0,103,0.121124,8.256031,89.0


In [6]:
srcpath = srcpath[(srcpath.binrca==1)].copy()
srcpath.path.nunique(),srcpath.src.nunique()

(82, 193)

In [7]:
srcpath['divsrc'] = srcpath.groupby('src')['path'].transform('nunique')
srcpath['divpath'] = srcpath.groupby('path')['src'].transform('nunique')

In [None]:
## load gbbs
pathcpc = pd.read_parquet('data/clusangle_outlier_hdbscan2.parquet',columns=['clus','cpc']).query('clus!="c-1"').rename(columns={'clus':'path'})
pathcpc['cpc'] = pathcpc['cpc'].str.replace(' ','')
pathcpc.head()

Unnamed: 0,path,cpc
0,c10,A01B1/00
8,c10,A01B39/18
9,c10,A01B61/00
11,c10,A01B63/1013
12,c10,A01B69/008


In [11]:
pathcpc.nunique()

path       82
cpc     30675
dtype: int64

In [None]:
## load source fields
srccpc = pd.read_parquet('data/clusangle_outlier_hdbscan2_src2.parquet',columns=['clus','cpc']).query('clus!="s-1"').rename(columns={'clus':'src'})
srccpc['cpc'] = srccpc['cpc'].str.replace(' ','')
srccpc.head()

Unnamed: 0,src,cpc
0,s112,A01B1/02
1,s112,A01B1/022
2,s112,A01B13/00
3,s112,A01B13/025
4,s112,A01B13/08


In [13]:
srccpc.nunique()

src      193
cpc    58841
dtype: int64

In [None]:
## assign colors to gbbs
unique_categories = np.sort(pathcpc.path.unique())
category_to_index = {cat: i for i, cat in enumerate(unique_categories)}
pathcpc['category_index'] = pathcpc['path'].map(category_to_index)

norm = plt.Normalize(vmin=0, vmax=len(unique_categories) - 1)
colormap = matplotlib.colormaps['gist_rainbow']

pathcpc['rgba'] = pathcpc.apply(lambda row: colormap(norm(row['category_index'])), axis=1)
pathcpc.head()

Unnamed: 0,path,cpc,category_index,rgba
0,c10,A01B1/00,2,"(1.0, 0.0, 0.034509803921568605, 1.0)"
8,c10,A01B39/18,2,"(1.0, 0.0, 0.034509803921568605, 1.0)"
9,c10,A01B61/00,2,"(1.0, 0.0, 0.034509803921568605, 1.0)"
11,c10,A01B63/1013,2,"(1.0, 0.0, 0.034509803921568605, 1.0)"
12,c10,A01B69/008,2,"(1.0, 0.0, 0.034509803921568605, 1.0)"


In [None]:
## assign colors to source fields
unique_categories = np.sort(srccpc.src.unique())
category_to_index = {cat: i for i, cat in enumerate(unique_categories)}
srccpc['category_index'] = srccpc['src'].map(category_to_index)

norm = plt.Normalize(vmin=0, vmax=len(unique_categories) - 1)
colormap = matplotlib.colormaps['gist_rainbow']

srccpc['rgba'] = srccpc.apply(lambda row: colormap(norm(row['category_index'])), axis=1)
srccpc.head()

Unnamed: 0,src,cpc,category_index,rgba
0,s112,A01B1/02,16,"(1.0, 0.2829888712241654, 0.0, 1.0)"
1,s112,A01B1/022,16,"(1.0, 0.2829888712241654, 0.0, 1.0)"
2,s112,A01B13/00,16,"(1.0, 0.2829888712241654, 0.0, 1.0)"
3,s112,A01B13/025,16,"(1.0, 0.2829888712241654, 0.0, 1.0)"
4,s112,A01B13/08,16,"(1.0, 0.2829888712241654, 0.0, 1.0)"


In [None]:
## load gbb names
pathname = pd.read_parquet("data/clusname3.parquet")
pathname['path'] = 'c'+pathname.clus.astype(str)
pathname.head()

Unnamed: 0,clus,name,desc,path
0,0,Capacitor Manufacturing Technologies,Processes and details in capacitor manufacture,c0
1,1,Electrochemical Energy Storage,"Advanced materials, processes, and constructio...",c1
2,2,Advanced Fuel Cells,"Technologies for efficient, controlled fuel ce...",c2
3,3,Advanced Engine Systems,Integrated components for modern internal comb...,c3
4,4,Rotary-Piston Machinery,Technologies related to rotary-piston engines ...,c4


In [None]:
## load source field names
srcname = pd.read_parquet("data/clusname5_src.parquet")
srcname['src'] = 's'+srcname.clus.astype(str)
srcname.head()

Unnamed: 0,clus,name,desc,src
0,0,Advanced Lubricants,Innovative compositions with diverse organic a...,s0
1,1,Advanced Dishwashing Technologies,Innovative features for commercial dishwashing...,s1
2,2,Advanced Semiconductor Bonding,Technologies for semiconductor interconnects a...,s2
3,3,Advanced Vessel Technologies,"Focus on insulated, non-pressurized vessels fo...",s3
4,4,Advanced Program-Control Systems,Technologies for optimizing and monitoring CNC...,s4


In [None]:
layout['green'] = layout.cpc4d.str[:3].isin(['Y02','Y04'])
layout['cpc8d'] = layout.cpc.str.split('/').str[0]
layout.tail()

In [None]:
greendf = layout[layout.green].copy()
greendf["title"] = greendf.cpc4d.map(
    {
        "Y02A": "Y02A_ADAPTATION",
        "Y02B": "Y02B_BUILDINGS",
        "Y02C": "Y02C_CCUS",
        "Y02D": "Y02D_ICT",
        "Y02E": "Y02E_ENERGY",
        "Y02P": "Y02P_PRODUCTION",
        "Y02T": "Y02T_TRANSPORTATION",
        "Y02W": "Y02W_WASTE",
        "Y04S": "Y04S_SMARTGRID",
    }
)

In [None]:
## plot Y02/04 targets
plt.figure(figsize=(10, 8))
plt.axis('off')
sns.scatterplot(data=layout[~layout.green], x="x", y="y", color='lightgrey',s=5)
for i in range(6):
    plt.annotate(labels['label'][i],(labels['x'][i]-3,labels['y'][i]-1),color='grey',fontsize=13)
ax = sns.scatterplot(data=greendf, x="x", y="y", hue="title",palette="gist_rainbow",s=10)
ax.legend(loc='upper center',ncol=5,prop={'size':6})
plt.savefig('Y02_in_techspace.pdf')

In [None]:
## plot gbbs
plt.figure(figsize=(10, 8))
plt.scatter(layout.x, layout.y, c="lightgrey",s=1)
gbbplotdf = pathcpc.merge(layout)
plt.scatter(gbbplotdf.x, gbbplotdf.y, c=gbbplotdf.rgba,s=1,edgecolor='white',linewidths=0.1)
for i in range(6):
    plt.annotate(labels['label'][i],(labels['x'][i]-3,labels['y'][i]-1),color='grey',fontsize=13)
plt.axis('off')
plt.savefig('gbb_in_techspace.pdf')

In [None]:
## plot zoomed gbb example
lbdf = layout.merge(pathcpc[pathcpc.path=='c2'])
lbdf =lbdf[lbdf.x.between(7.5,8.75)&lbdf.y.between(6.5,7)]
lbdf['cpc8d'] = lbdf.cpc.str.split('/').str[0]
lbdf

Unnamed: 0,cpc,cpc1d,cpc4d,title,x,y,path,category_index,rgba,cpc8d
3,H01M12/00,H,H01M,Hybrid cells; Manufacture thereof,8.184791,6.685297,c2,12,"(1.0, 0.6221515633280338, 0.0, 1.0)",H01M12
4,H01M16/00,H,H01M,Structural combinations of different types of ...,8.107008,6.733237,c2,12,"(1.0, 0.6221515633280338, 0.0, 1.0)",H01M16
5,H01M16/003,H,H01M,Structural combinations of different types of ...,8.462982,6.646824,c2,12,"(1.0, 0.6221515633280338, 0.0, 1.0)",H01M16
6,H01M16/006,H,H01M,Structural combinations of different types of ...,8.474522,6.662554,c2,12,"(1.0, 0.6221515633280338, 0.0, 1.0)",H01M16
7,H01M2004/8684,H,H01M,Electrodes-Inert electrodes with catalytic act...,8.406021,6.775294,c2,12,"(1.0, 0.6221515633280338, 0.0, 1.0)",H01M2004
...,...,...,...,...,...,...,...,...,...,...
321,H01M8/248,H,H01M,Fuel cells; Manufacture thereof-Grouping of fu...,8.443974,6.780737,c2,12,"(1.0, 0.6221515633280338, 0.0, 1.0)",H01M8
322,H01M8/2483,H,H01M,Fuel cells; Manufacture thereof-Grouping of fu...,8.438474,6.785793,c2,12,"(1.0, 0.6221515633280338, 0.0, 1.0)",H01M8
323,H01M8/2484,H,H01M,Fuel cells; Manufacture thereof-Grouping of fu...,8.438851,6.782925,c2,12,"(1.0, 0.6221515633280338, 0.0, 1.0)",H01M8
324,H01M8/2485,H,H01M,Fuel cells; Manufacture thereof-Grouping of fu...,8.431506,6.790516,c2,12,"(1.0, 0.6221515633280338, 0.0, 1.0)",H01M8


In [None]:
lbdf['title']=lbdf.title.str.replace('Fuel cells; Manufacture thereof-','').str.split('-').str[0]
plt.figure(figsize=(10, 8))
plt.scatter(lbdf.x, lbdf.y, c=lbdf.rgba,s=100,edgecolor='white',linewidths=0.1)
plt.axis('off')

In [None]:
## plot zoomed source field example
lbdf = layout.merge(srccpc[srccpc.src=='s192'])
lbdf =lbdf[(lbdf.y<-3)&(lbdf.x<2)]
lbdf['cpc8d'] = lbdf.cpc.str.split('/').str[0]
lbdf

Unnamed: 0,cpc,cpc1d,cpc4d,title,x,y,src,category_index,rgba,cpc8d
1,B60D1/565,B,B60D,Traction couplings; Hitches; Draw-gear; Towing...,-1.873910,-4.852061,s192,104,"(0.0, 1.0, 0.7590132827324482, 1.0)",B60D1
2,B60D1/66,B,B60D,Traction couplings; Hitches; Draw-gear; Towing...,-1.636673,-4.758918,s192,104,"(0.0, 1.0, 0.7590132827324482, 1.0)",B60D1
3,B60J5/02,B,B60J,Doors -arranged at the vehicle front,-1.420564,-5.103268,s192,104,"(0.0, 1.0, 0.7590132827324482, 1.0)",B60J5
4,B60J5/0444,B,B60J,Doors -arranged at the vehicle sides -Reinforc...,-1.691117,-4.900367,s192,104,"(0.0, 1.0, 0.7590132827324482, 1.0)",B60J5
5,B60J5/0487,B,B60J,Doors -arranged at the vehicle sides -Special ...,-1.473282,-5.027539,s192,104,"(0.0, 1.0, 0.7590132827324482, 1.0)",B60J5
...,...,...,...,...,...,...,...,...,...,...
169,B66F11/00,B,B66F,Lifting devices specially adapted for particul...,0.192947,-3.562601,s192,104,"(0.0, 1.0, 0.7590132827324482, 1.0)",B66F11
170,B66F19/00,B,B66F,"Hoisting, lifting, hauling or pushing, not oth...",0.237833,-3.414860,s192,104,"(0.0, 1.0, 0.7590132827324482, 1.0)",B66F19
171,B66F3/16,B,B66F,"Devices, e.g. jacks, adapted for uninterrupted...",0.181228,-3.605050,s192,104,"(0.0, 1.0, 0.7590132827324482, 1.0)",B66F3
172,B66F9/07545,B,B66F,Devices for lifting or lowering bulky or heavy...,-1.008837,-4.340151,s192,104,"(0.0, 1.0, 0.7590132827324482, 1.0)",B66F9


In [None]:
plt.figure(figsize=(10, 8))
plt.scatter(lbdf.x, lbdf.y, c=lbdf.rgba,s=100,edgecolor='white',linewidths=0.1)
plt.axis('off')

In [None]:
## plot zoomed target Y02T example
lbdf = layout[layout.cpc.str.startswith('Y02T')].query('x<-3 and y<-4')
lbdf['cpc8d'] = lbdf.cpc.str.split('/').str[0]
lbdf

Unnamed: 0,cpc,cpc1d,cpc4d,title,x,y,cpc8d
236815,Y02T10/52,Y,Y02T,,-3.973043,-4.987017,Y02T10
236816,Y02T10/54,Y,Y02T,,-4.032020,-5.140469,Y02T10
236817,Y02T10/56,Y,Y02T,,-3.848162,-5.062780,Y02T10
236819,Y02T10/62,Y,Y02T,Road transport of goods or passengers-Other ro...,-3.966332,-5.140134,Y02T10
236820,Y02T10/6204,Y,Y02T,,-3.760617,-5.083608,Y02T10
...,...,...,...,...,...,...,...
236999,Y02T90/167,Y,Y02T,Enabling technologies or technologies with a p...,-3.232666,-4.650054,Y02T90
237000,Y02T90/168,Y,Y02T,,-3.172472,-4.563382,Y02T90
237001,Y02T90/169,Y,Y02T,,-3.167439,-4.535995,Y02T90
237002,Y02T90/30,Y,Y02T,,-3.298459,-4.694339,Y02T90


In [None]:
plt.figure(figsize=(10, 8))
plt.scatter(lbdf.x, lbdf.y, c='#005eff',s=100,edgecolor='white',linewidths=0.1)
plt.axis('off')

In [None]:
## plot source fields
plt.figure(figsize=(10, 8))
plt.scatter(layout.x, layout.y, c="lightgrey",s=1)
srcplotdf = srccpc.merge(layout)
plt.scatter(srcplotdf.x, srcplotdf.y, c=srcplotdf.rgba,s=1,edgecolor='white',linewidths=0.1)
for i in range(6):
    plt.annotate(labels['label'][i],(labels['x'][i]-3,labels['y'][i]-1),color='grey',fontsize=13)
plt.axis('off')
plt.savefig('src_in_techspace.pdf')

In [None]:
## plot one general gbb associated source fields
target = "c2"
plt.figure(figsize=(10, 8))
plt.scatter(layout.x, layout.y, c="lightgrey",s=1)
plotdf = srcplotdf.merge(srcpath[srcpath.path==target])
plotdf['rgba'] = plotdf['rgba'].apply(lambda x: (x[0],x[1],x[2],x[3]*0.8))
plt.scatter(plotdf.x, plotdf.y, c=plotdf.rgba,s=5,marker='v',edgecolor='white',linewidths=0.1)
plt.scatter(gbbplotdf[gbbplotdf.path==target].x, gbbplotdf[gbbplotdf.path==target].y, c='green',s=25,edgecolor='white',linewidths=0.5)
plt.axis('off')
plt.savefig('general_gbb.pdf',bbox_inches='tight')

In [None]:
## plot one special gbb associated source fields
target = "c42"
plt.figure(figsize=(10, 8))
plt.scatter(layout.x, layout.y, c="lightgrey",s=1)
plotdf = srcplotdf.merge(srcpath[srcpath.path==target])
plotdf['rgba'] = plotdf['rgba'].apply(lambda x: (x[0],x[1],x[2],x[3]*0.8))
plt.scatter(plotdf.x, plotdf.y, c=plotdf.rgba,s=5,marker='v',edgecolor='white',linewidths=0.1)
plt.scatter(gbbplotdf[gbbplotdf.path==target].x, gbbplotdf[gbbplotdf.path==target].y, c='green',s=25,edgecolor='white',linewidths=0.5)
plt.axis('off')
plt.savefig('special_gbb.pdf',bbox_inches='tight')

In [None]:
## plot one special source field associated gbbs
source = "s174"
plt.figure(figsize=(10, 8))
plt.scatter(layout.x, layout.y, c="lightgrey",s=1)
plotdf = gbbplotdf.merge(srcpath[srcpath.src==source])
plotdf['rgba'] = plotdf['rgba'].apply(lambda x: (x[0],x[1],x[2],x[3]*0.8))
plt.scatter(plotdf.x, plotdf.y, c=plotdf.rgba,s=5,marker='v',edgecolor='white',linewidths=0.1)
plt.scatter(srcplotdf[srcplotdf.src==source].x, srcplotdf[srcplotdf.src==source].y, c='brown',s=25,edgecolor='white',linewidths=0.5)
plt.axis('off')
plt.savefig('special_src.pdf',bbox_inches='tight')

In [None]:
## plot one general source field associated gbbs
source = "s114"
plt.figure(figsize=(10, 8))
plt.scatter(layout.x, layout.y, c="lightgrey",s=1)
plotdf = gbbplotdf.merge(srcpath[srcpath.src==source])
plotdf['rgba'] = plotdf['rgba'].apply(lambda x: (x[0],x[1],x[2],x[3]*0.8))
plt.scatter(plotdf.x, plotdf.y, c=plotdf.rgba,s=5,marker='v',edgecolor='white',linewidths=0.1)
plt.scatter(srcplotdf[srcplotdf.src==source].x, srcplotdf[srcplotdf.src==source].y, c='brown',s=25,edgecolor='white',linewidths=0.5)
plt.axis('off')
plt.savefig('general_src.pdf',bbox_inches='tight')

## visualize auto firms from gbb presence

In [37]:
firmpatcpc = pd.read_parquet('firmreg5_srcpath2/firm_pat_cpc.parquet')
firmpatcpc.head()

Unnamed: 0,person_id,docdb_family_id,period,cpc,cpc4d
0,26821,9910158,1995,H01L 41/0993,H01L
1,26821,9910158,1995,H01L 41/338,H01L
2,26821,9911708,1995,H04R 1/26,H04R
3,26821,9911708,1995,H04R 1/403,H04R
4,26821,9911708,1995,H04R 3/12,H04R


In [38]:
firmname = pd.read_parquet('firmreg5_srcpath2/firmidname.parquet',columns=['psn_id','psn_name']).rename(columns={'psn_id':'person_id'}).drop_duplicates()
firmname.head()

Unnamed: 0,person_id,psn_name
0,24480361,PRINCETON GAMMA TECH INSTRUMENTS
1,24494825,PROCESS QUERY SYSTEMS
2,24500379,PRODUCTIVE RESEARCH
3,24522295,PRONERVE
4,24531532,PROTECTWISE


In [None]:
## Use 3 4-digit cpc to filter auto firms
auto = firmpatcpc[(firmpatcpc.cpc4d.str[:3] == "B60")
                | (firmpatcpc.cpc4d.str[:3] == "F02")
                | (firmpatcpc.cpc4d.str[:4] == "B62D")].groupby('person_id')['docdb_family_id'].nunique().reset_index()
auto.head()

Unnamed: 0,person_id,docdb_family_id
0,35524,12
1,42082,1
2,42782,1
3,43118,1
4,43121,125


In [40]:
auto = auto[auto.docdb_family_id>=80].copy()
auto.shape

(1422, 2)

In [41]:
autocpc = firmpatcpc.merge(auto[['person_id']])
autocpc.head()

Unnamed: 0,person_id,docdb_family_id,period,cpc,cpc4d
0,12911408,23506018,1995,H04L 12/42,H04L
1,12911408,23506018,1995,H04L 12/433,H04L
2,12911408,24024106,1995,H04L 12/42,H04L
3,12911408,24024106,1995,H04L 12/433,H04L
4,12911408,24024106,1995,H04L 12/437,H04L


In [None]:
## presence of GBB
autoclus = autocpc.merge(pathcpc)
autoclus.head()

Unnamed: 0,person_id,docdb_family_id,period,cpc,cpc4d,path,category_index,rgba
0,12911408,24381943,1995,H01L2924/0002,H01L,c65,62,"(0.0, 0.028772378516623953, 1.0, 1.0)"
1,12911408,39916466,2005,H01L2924/0002,H01L,c65,62,"(0.0, 0.028772378516623953, 1.0, 1.0)"
2,12911408,46926123,2005,H01L2924/0002,H01L,c65,62,"(0.0, 0.028772378516623953, 1.0, 1.0)"
3,12911408,54328299,2005,H01L2924/0002,H01L,c65,62,"(0.0, 0.028772378516623953, 1.0, 1.0)"
4,12911408,37462556,2005,H01L2924/0002,H01L,c65,62,"(0.0, 0.028772378516623953, 1.0, 1.0)"


In [None]:
## calc specialization with RCA
autoclusrca = autoclus.groupby(['person_id','path'])['docdb_family_id'].nunique().reset_index()
autoclusrca['rca'] = (
    autoclusrca["docdb_family_id"].sum()
    * autoclusrca.docdb_family_id
    / autoclusrca.groupby(["person_id"])["docdb_family_id"].transform(sum)
    / autoclusrca.groupby(["path"])["docdb_family_id"].transform(sum)
)
autoclusrca['binrca'] = np.where(autoclusrca.rca>=1,1,0)
autoclusrca['rca2'] = autoclusrca.rca/(autoclusrca.rca+1)
autoclusrca.head()

Unnamed: 0,person_id,path,docdb_family_id,rca,binrca,rca2
0,43121,c1,11,0.233555,0,0.189335
1,43121,c10,6,0.021434,0,0.020984
2,43121,c11,19,1.281377,1,0.561668
3,43121,c12,175,3.411672,1,0.773329
4,43121,c15,5,0.44078,0,0.305931


In [44]:
firm_cpc = pd.read_parquet('firmreg5_srcpath2/firm_cpc_period.parquet')
firm_cpc.head()

Unnamed: 0,person_id,period,cpc,docdb_family_id
0,26821,1995,A63J5/04,1
1,26821,1995,B06B1/0603,3
2,26821,1995,B41J2/1607,3
3,26821,1995,B41J2/1623,3
4,26821,1995,B41J2/1632,2


In [45]:
firm_cpc = firm_cpc.groupby(['person_id','cpc'])['docdb_family_id'].sum().reset_index()
firm_cpc.head()

Unnamed: 0,person_id,cpc,docdb_family_id
0,26821,A63J5/04,1
1,26821,B06B1/0603,4
2,26821,B41J2/1607,3
3,26821,B41J2/1623,3
4,26821,B41J2/1632,2


In [None]:
#tesla
fid=[30355257,30355178]
plt.figure(figsize=(10, 8))
plt.scatter(layout.x, layout.y, c="lightgrey",s=1)
plotdf = (gbbplotdf.merge(firm_cpc[firm_cpc.person_id.isin(fid)])
          .merge(autoclusrca[autoclusrca.person_id.isin(fid)].groupby('path')['rca2'].max().reset_index())
         )
plotdf['rgba'] = plotdf.apply(lambda x: (x.rgba[0],x.rgba[1],x.rgba[2],x.rca2),axis=1)
plt.scatter(plotdf.x, plotdf.y, c=plotdf.rgba,s=16,marker='v',edgecolor='white',linewidths=0.1)
plt.axis('off')
plt.savefig('TESLA.png',bbox_inches='tight')

In [57]:
autoclusrca[autoclusrca.person_id.isin(fid)].merge(pathname[['path','name']]).sort_values('rca',ascending=False).head()

Unnamed: 0,person_id,path,docdb_family_id,rca,binrca,rca2,name
15,30355178,c48,3,48.257607,1,0.979699,Advanced Solar Heat Collection
7,30355178,c13,1,15.503814,1,0.939408,Modern Dynamo-Electric Machines
9,30355257,c14,2,11.713428,1,0.921343,Electric Motor Control Technologies
29,30355178,c74,5,9.97072,1,0.908848,Electrical Coupling Devices
1,30355257,c1,29,9.608888,1,0.905739,Electrochemical Energy Storage


In [None]:
#bmw
fid=[2779817]
plt.figure(figsize=(10, 8))
plt.scatter(layout.x, layout.y, c="lightgrey",s=1)
plotdf = (gbbplotdf.merge(firm_cpc[firm_cpc.person_id.isin(fid)])
          .merge(autoclusrca[autoclusrca.person_id.isin(fid)].groupby('path')['rca2'].max().reset_index())
         )
plotdf['rgba'] = plotdf.apply(lambda x: (x.rgba[0],x.rgba[1],x.rgba[2],x.rca2),axis=1)
plt.scatter(plotdf.x, plotdf.y, c=plotdf.rgba,s=16,marker='v',edgecolor='white',linewidths=0.1)
plt.axis('off')
plt.savefig('BMW.png',bbox_inches='tight')

In [65]:
autoclusrca[autoclusrca.person_id.isin(fid)].merge(pathname[['path','name']]).sort_values('rca',ascending=False).head()

Unnamed: 0,person_id,path,docdb_family_id,rca,binrca,rca2,name
21,2779817,c35,323,7.314003,1,0.879721,Gas Storage and Transfer
40,2779817,c62,152,2.920258,1,0.744915,Advanced Welding Technologies
1,2779817,c10,2517,2.435435,1,0.708916,Automotive Mechanisms Cluster
0,2779817,c1,370,2.127863,1,0.680293,Electrochemical Energy Storage
16,2779817,c3,1654,1.996291,1,0.666254,Advanced Engine Systems


In [None]:
#byd
fid=[3620198]
plt.figure(figsize=(10, 8))
plt.scatter(layout.x, layout.y, c="lightgrey",s=1)
plotdf = (gbbplotdf.merge(firm_cpc[firm_cpc.person_id.isin(fid)])
          .merge(autoclusrca[autoclusrca.person_id.isin(fid)].groupby('path')['rca2'].max().reset_index())
         )
plotdf['rgba'] = plotdf.apply(lambda x: (x.rgba[0],x.rgba[1],x.rgba[2],x.rca2),axis=1)
plt.scatter(plotdf.x, plotdf.y, c=plotdf.rgba,s=16,marker='v',edgecolor='white',linewidths=0.1)
plt.axis('off')
plt.savefig('BYD.png',bbox_inches='tight')

In [72]:
autoclusrca[autoclusrca.person_id.isin(fid)].merge(pathname[['path','name']]).sort_values('rca',ascending=False).head()

Unnamed: 0,person_id,path,docdb_family_id,rca,binrca,rca2,name
4,3620198,c13,10,13.035407,1,0.928752,Modern Dynamo-Electric Machines
21,3620198,c49,2,11.403637,1,0.919378,Advanced Semiconductor Imaging Technologies
20,3620198,c48,6,8.114875,1,0.890289,Advanced Solar Heat Collection
0,3620198,c1,225,7.268459,1,0.879058,Electrochemical Energy Storage
7,3620198,c19,22,3.031849,1,0.751975,Advanced Ceramic Technologies


## visualize countries from gbb presence

In [None]:
cntrycpc = pd.read_parquet('../patstat/cntry_inv_patfam_cpcall.parquet',columns=['docdb_family_id','ctry_code','cpc4d','wt']).rename(columns={'cpc4d':'cpc'})
cntrycpc['cpc'] = cntrycpc['cpc'].str.replace(' ','')
cntrycpc.head()

Unnamed: 0,docdb_family_id,ctry_code,cpc,wt
0,209182,CN,A61M39/02,1.0
1,209182,CN,A61M39/26,1.0
2,569328,SE,A61M1/32,1.0
3,569328,SE,A61M1/325,1.0
4,569328,SE,Y10S261/28,1.0


In [None]:
cntrycpcagg = cntrycpc.groupby(['ctry_code','cpc'])['docdb_family_id'].nunique().reset_index()

In [77]:
worldpath = cntrycpc.merge(pathcpc).groupby(['path'])['docdb_family_id'].nunique().reset_index()
worldpath.sort_values('docdb_family_id',ascending=False).head(10)

Unnamed: 0,path,docdb_family_id
76,c78,715266
2,c10,333080
73,c75,264786
75,c77,231004
23,c3,216822
62,c65,196143
16,c23,171618
81,c9,162797
61,c64,155681
18,c25,150138


In [None]:
cntryagg= cntrycpc.groupby(['ctry_code'])['docdb_family_id'].nunique().reset_index()
cntryagg.head(10)

In [None]:
cntrypath = cntrycpc.merge(pathcpc).groupby(['path','ctry_code'])['docdb_family_id'].nunique().reset_index()
cntrypath.sort_values('docdb_family_id',ascending=False).head(10)

In [80]:
cntrypath = cntrypath.merge(worldpath.rename(columns={'docdb_family_id':'pathcnt'})).merge(cntryagg.rename(columns={'docdb_family_id':'cntrycnt'}))
cntrypath['rca'] = cntrypath.docdb_family_id*8820575/cntrypath.pathcnt/cntrypath.cntrycnt
cntrypath['rca2'] = cntrypath['rca']/(cntrypath['rca']+1)
cntrypath.sort_values('rca')

Unnamed: 0,path,ctry_code,docdb_family_id,pathcnt,cntrycnt,rca,rca2
1482,c64,HU,1,155681,7489,0.007565,0.007509
2357,c49,NZ,1,49391,5989,0.029819,0.028956
308,c65,BR,9,196143,12093,0.033468,0.032384
1616,c42,IL,1,6704,39284,0.033492,0.032407
1447,c32,HU,2,65907,7489,0.035741,0.034508
...,...,...,...,...,...,...,...
2940,c71,TR,318,46191,4723,12.857268,0.927836
2015,c35,LU,46,9716,3029,13.786942,0.932373
820,c20,DK,1289,27238,22583,18.483873,0.948676
2009,c28,LU,242,29203,3029,24.131567,0.960209


In [81]:
cntrypath.query('ctry_code=="CN"').sort_values('rca',ascending=False).head().merge(pathname)

Unnamed: 0,path,ctry_code,docdb_family_id,pathcnt,cntrycnt,rca,rca2,clus,name,desc
0,c18,CN,4406,30483,307394,4.147518,0.805732,18,Advanced Concrete Materials,Innovative compositions and functionalities fo...
1,c78,CN,50830,715266,307394,2.039173,0.670963,78,Advanced Network Management and Communications,"Emphasis on network management, signal process..."
2,c48,CN,1049,17343,307394,1.735613,0.634451,48,Advanced Solar Heat Collection,Technologies enhancing solar heat collector ef...
3,c79,CN,5906,98107,307394,1.727408,0.633352,79,Computer Security Systems,"Technologies for data protection, authenticati..."
4,c17,CN,2502,44175,307394,1.62522,0.61908,17,Advanced Lighting Technologies,"Innovative lighting solutions with LEDs, cooli..."


In [82]:
cntrypath.query('ctry_code=="US"').sort_values('rca',ascending=False).head().merge(pathname)

Unnamed: 0,path,ctry_code,docdb_family_id,pathcnt,cntrycnt,rca,rca2,clus,name,desc
0,c44,US,8858,14079,2689005,2.063807,0.673609,44,Advanced Hydrocarbon Processing,Technologies for refining and processing hydro...
1,c45,US,6948,11917,2689005,1.912486,0.656651,45,Hydrocarbon Catalysis,Catalytic transformations and purification of ...
2,c25,US,82204,150138,2689005,1.796005,0.642347,25,Advanced Medical Devices,"Innovative tools for diagnostics, treatment, a..."
3,c42,US,3655,6704,2689005,1.788375,0.641368,42,Advanced Molecular Sieves,Innovative catalysts with crystalline aluminos...
4,c72,US,58593,110066,2689005,1.746215,0.635863,72,Biotech Processes and Compounds,"Technologies involving enzymes, antibodies, an..."


In [117]:
cntrypath.query('ctry_code=="DE"').sort_values('rca',ascending=False).head().merge(pathname)

Unnamed: 0,path,ctry_code,docdb_family_id,pathcnt,cntrycnt,rca,rca2,clus,name,desc
0,c43,DE,1067,4299,949872,2.304776,0.697408,43,Advanced Gasification,"Processes, feed details, and apparatus for car..."
1,c10,DE,82291,333080,949872,2.294223,0.696438,10,Automotive Mechanisms Cluster,"Focus on vehicle transmissions, bearings, and ..."
2,c3,DE,52333,216822,949872,2.241321,0.691484,3,Advanced Engine Systems,Integrated components for modern internal comb...
3,c36,DE,4385,19481,949872,2.090211,0.676398,36,Fluid Control Systems,Technologies for controlling fluid flow and pr...
4,c60,DE,2122,10199,949872,1.932056,0.658942,60,Advanced Steam Systems,"Technologies focused on steam generation, cont..."


In [None]:
#CN
fid='CN'
plt.figure(figsize=(10, 8))
plt.scatter(layout.x, layout.y, c="lightgrey",s=1)
plotdf = (gbbplotdf.merge(cntrycpcagg[cntrycpcagg.ctry_code==fid])
          .merge(cntrypath[cntrypath.ctry_code==fid][['ctry_code','path','rca2']])
         )
plotdf['rgba'] = plotdf.apply(lambda x: (x.rgba[0],x.rgba[1],x.rgba[2],x.rca2**3),axis=1)
plt.scatter(plotdf.x, plotdf.y, c=plotdf.rgba,s=16,marker='v',edgecolor='white',linewidths=0.1)
plt.axis('off')
plt.savefig('CN.png',bbox_inches='tight')

In [None]:
#US
fid='US'
plt.figure(figsize=(10, 8))
plt.scatter(layout.x, layout.y, c="lightgrey",s=1)
plotdf = (gbbplotdf.merge(cntrycpcagg[cntrycpcagg.ctry_code==fid])
          .merge(cntrypath[cntrypath.ctry_code==fid][['ctry_code','path','rca2']])
         )
plotdf['rgba'] = plotdf.apply(lambda x: (x.rgba[0],x.rgba[1],x.rgba[2],x.rca2**4),axis=1)
plt.scatter(plotdf.x, plotdf.y, c=plotdf.rgba,s=16,marker='v',edgecolor='white',linewidths=0.1)
plt.axis('off')
plt.savefig('US.png',bbox_inches='tight')

In [None]:
#DE
fid='DE'
plt.figure(figsize=(10, 8))
plt.scatter(layout.x, layout.y, c="lightgrey",s=1)
plotdf = (gbbplotdf.merge(cntrycpcagg[cntrycpcagg.ctry_code==fid])
          .merge(cntrypath[cntrypath.ctry_code==fid][['ctry_code','path','rca2']])
         )
plotdf['rgba'] = plotdf.apply(lambda x: (x.rgba[0],x.rgba[1],x.rgba[2],x.rca2**3),axis=1)
plt.scatter(plotdf.x, plotdf.y, c=plotdf.rgba,s=16,marker='v',edgecolor='white',linewidths=0.1)
plt.axis('off')
plt.savefig('US.png',bbox_inches='tight')