# Patent Inventors Ego Analysis

## Import libraries

In [None]:
# import pandas for initial processing
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
# import NetworkX and nxviz for graphs
import networkx as nx
from nxviz import MatrixPlot, ArcPlot, CircosPlot

## Load/examine the data

In [None]:
# read in the raw CSV
data = pd.read_csv('co-co-inventing.csv', low_memory=False)
data.head(20)

In [None]:
# View a default data summary of all fields in data
data.describe(include='all')

### Inventors

In [None]:
# List top 20 inventors with most patent references
inventor_patents_count = data['inventor_id'].value_counts()[:20]
inventor_patents_count

In [None]:
# Plot top 20 as bar chart sorted from most to least
inventor_patents_count.sort_values(ascending=True).plot(kind='barh')
# To save as a PNG
# plt.savefig("inventor_barh.png", dpi=300)

### Patents

In [None]:
# list top 20 patents by number of mentions
patent_counts = data['patent_id'].value_counts()[:20]
patent_counts

In [None]:
# Plot top 20 as bar chart sorted from most to least
patent_counts.sort_values(ascending=True).plot(kind='barh')
# To save as a PNG
# plt.savefig("patents_barh.png", dpi=300)

### Firms

In [None]:
# number of unique Firms
len(data.Firm_id.unique())

In [None]:
# list top firms by number mentions (either by patent/inventor/both)
firm_counts = data['Firm_id'].value_counts()[:20]
firm_counts

In [None]:
# Plot top 20 as bar chart sorted from most to least
firm_counts.sort_values(ascending=True).plot(kind='barh')
# To save as a PNG
# plt.savefig("firms_barh.png", dpi=300)

### Create Inventor's Patent Graph

In [None]:
# select specific inventor
data_3930451_3 = data.loc[data['inventor_id'] == '3930451-3']
data_3930451_3.head(10)

In [None]:
# count number of patents for selected inventor
len(data_3930451_3)

In [None]:
# group by firm to see how many unique patents the inventor made for each
inventor_firm_groups = data_3930451_3.groupby(['Firm_id', 'inventor_id']).count()
# To view the patent numbers
# inventor_firm_groups = data_3930451_3.groupby(['Firm_id', 'inventor_id', 'patent_id']).count()
inventor_firm_groups

In [None]:
# create a graph from our inventor data
# ego is inventor and alters are patents
# edge data contains the Firm_id
graph_3930451_3 = nx.from_pandas_dataframe(data_3930451_3, 'inventor_id', 'patent_id', edge_attr=['Firm_id'])

In [None]:
# draw the inventor/pantent graph
nx.draw(graph_3930451_3, with_labels=True)
# To save as a PNG
# plt.savefig("graph_3930451_3_patents.png", dpi=300)

### Create Inventor-to-Inventor Patent Graph

In [None]:
# create a list of inventors per patent
data_patent_inventors = data.groupby('patent_id')['inventor_id'].apply(list)

In [None]:
# to dataframe
dpi_df = data_patent_inventors.to_frame()
dpi_df['patent_id1'] = dpi_df.index
dpi_df.head()

### Create Inventor-to-Inventor Firm Graph

In [None]:
# create a list of inventors per firm
data_firm_inventors = data.groupby('Firm_id')['inventor_id'].apply(list)
data_firm_inventors.head()

In [None]:
# to dataframe
dfi_df = data_firm_inventors.to_frame()
dfi_df['firm_id'] = dfi_df.index
dfi_df.head()