# Visualizing My LinkedIn Network

## Introduction 

### Initial Imports

In [1]:
# Primary 
from pathlib import Path
import pandas as pd
import plotly.express as px

In [2]:
# Visualization settings
VIZ_WIDTH, VIZ_HEIGHT = 1000, 1000

# Importing in the Data
path = Path('Connections.csv')
connections_df = pd.read_csv(path, skiprows=2)

### Redacting Dataset

In [3]:
# Redacting dataset by limiting viewable columns
redacted_connections_df = connections_df[['Company', 'Position', 'Connected On', 'Email Address']]
redacted_connections_df

Unnamed: 0,Company,Position,Connected On,Email Address
0,Heartland Community Network,Senior Consultant,07 Feb 2026,
1,Coding Temple,Data Analyst,07 Feb 2026,
2,Stealth AI Startup,Solutions Engineer,07 Feb 2026,
3,Rebecca Everlene Trust Company,Data Analyst,07 Feb 2026,
4,Northeastern University MGEN,Graduate Teaching Assistant,07 Feb 2026,
...,...,...,...,...
4705,Pomona College,Admissions Intern,15 Oct 2014,
4706,Bari Katz Consulting,"Social Justice Educator, Curriculum Desiger, F...",14 Oct 2014,
4707,US Environmental Protection Agency (EPA),Brownfields Project Manager,14 Oct 2014,
4708,Pomona College,John Sutton Miner Professor of History in Clas...,14 Oct 2014,


### Data Cleaning

In [4]:
# Removing rows with missing data (keeping rows with at least 3 non-null values)
redacted_connections_df = redacted_connections_df.dropna(axis=0, thresh=3)
redacted_connections_df

Unnamed: 0,Company,Position,Connected On,Email Address
0,Heartland Community Network,Senior Consultant,07 Feb 2026,
1,Coding Temple,Data Analyst,07 Feb 2026,
2,Stealth AI Startup,Solutions Engineer,07 Feb 2026,
3,Rebecca Everlene Trust Company,Data Analyst,07 Feb 2026,
4,Northeastern University MGEN,Graduate Teaching Assistant,07 Feb 2026,
...,...,...,...,...
4704,Dickinson College,Vice President and Chief Diversity Officer,15 Oct 2014,
4705,Pomona College,Admissions Intern,15 Oct 2014,
4706,Bari Katz Consulting,"Social Justice Educator, Curriculum Desiger, F...",14 Oct 2014,
4707,US Environmental Protection Agency (EPA),Brownfields Project Manager,14 Oct 2014,


## Visualizations

In [8]:
# What companies do most of my colleagues work at?
px.treemap(redacted_connections_df, path=['Company', 'Position'], width=VIZ_WIDTH, height=VIZ_HEIGHT).show()

In [8]:
# What types of roles do most of my colleagues have?
px.treemap(redacted_connections_df, path=['Position', 'Company'], width=VIZ_WIDTH, height=VIZ_HEIGHT).show()

## Network Analysis

In [5]:
import networkx as nx
from pyvis.network import Network

### Company Network

In [7]:
# Build network graph: Companies connected by shared positions
G = nx.Graph()

# Add edges between companies that share position types
for position in redacted_connections_df['Position'].dropna().unique():
    companies = redacted_connections_df[redacted_connections_df['Position'] == position]['Company'].dropna().unique()
    for i, company1 in enumerate(companies):
        for company2 in companies[i+1:]:
            if G.has_edge(company1, company2):
                G[company1][company2]['weight'] += 1
            else:
                G.add_edge(company1, company2, weight=1)

# Create interactive visualization
net = Network(height='750px', width='100%', directed=False, notebook=True)
net.from_nx(G)
net.show_buttons(filter_=['physics'])
net.show('network_graph.html')

print(f"‚úì Network created with {G.number_of_nodes()} companies and {G.number_of_edges()} connections")
print("‚úì Saved to 'network_graph.html' for GitHub Pages hosting")

network_graph.html
‚úì Network created with 1399 companies and 25104 connections
‚úì Saved to 'network_graph.html' for GitHub Pages hosting
