# Visualizing My LinkedIn Network

## Introduction 

### Initial Imports

In [44]:
# Primary 
from pathlib import Path
import pandas as pd
import plotly.express as px

In [45]:
# Visualization settings
VIZ_WIDTH, VIZ_HEIGHT = 1000, 1000

# Importing in the Data
path = Path('Connections.csv')
connections_df = pd.read_csv(path, skiprows=2)
pd.set_option('display.max_rows', None)


### Redacting Dataset

In [46]:
# Redacting dataset by limiting viewable columns
redacted_connections_df = connections_df[['First Name', 'Last Name', 'Company', 'Position', 'Connected On']]
redacted_connections_df.head(50)

Unnamed: 0,First Name,Last Name,Company,Position,Connected On
0,Neha,Chede,Heartland Community Network,Senior Consultant,07 Feb 2026
1,Suzanne,Leonard,Coding Temple,Data Analyst,07 Feb 2026
2,Gauri,Nerkar,Stealth AI Startup,Solutions Engineer,07 Feb 2026
3,Tanay,Parikh,Rebecca Everlene Trust Company,Data Analyst,07 Feb 2026
4,Muskan,Raisinghani,Northeastern University MGEN,Graduate Teaching Assistant,07 Feb 2026
5,Pranoy Conrad,Luis,Raymour & Flanigan Furniture and Mattresses,Data Analyst,07 Feb 2026
6,Lynnette,Bennett,Synergy Direct Response,Chief Operating Officer,06 Feb 2026
7,Ahsan,Sultan,Penn State Nittany AI Alliance,Automation & Solutions Engineer Intern,06 Feb 2026
8,Victor,Defontnouvelle,DoorDash,Software Engineer,06 Feb 2026
9,Shreya,Pavadashettar Jayanna,Clinical ink,Data Analyst,06 Feb 2026


### Data Cleaning

In [47]:
# Removing rows with missing data (keeping rows with at least 3 non-null values)
redacted_connections_df = redacted_connections_df.dropna(axis=0, thresh=3)
redacted_connections_df.head(50)

Unnamed: 0,First Name,Last Name,Company,Position,Connected On
0,Neha,Chede,Heartland Community Network,Senior Consultant,07 Feb 2026
1,Suzanne,Leonard,Coding Temple,Data Analyst,07 Feb 2026
2,Gauri,Nerkar,Stealth AI Startup,Solutions Engineer,07 Feb 2026
3,Tanay,Parikh,Rebecca Everlene Trust Company,Data Analyst,07 Feb 2026
4,Muskan,Raisinghani,Northeastern University MGEN,Graduate Teaching Assistant,07 Feb 2026
5,Pranoy Conrad,Luis,Raymour & Flanigan Furniture and Mattresses,Data Analyst,07 Feb 2026
6,Lynnette,Bennett,Synergy Direct Response,Chief Operating Officer,06 Feb 2026
7,Ahsan,Sultan,Penn State Nittany AI Alliance,Automation & Solutions Engineer Intern,06 Feb 2026
8,Victor,Defontnouvelle,DoorDash,Software Engineer,06 Feb 2026
9,Shreya,Pavadashettar Jayanna,Clinical ink,Data Analyst,06 Feb 2026


## Visualizations

In [None]:
# What companies do most of my colleagues work at?
px.treemap(redacted_connections_df.dropna(subset=['Company', 'Position']), path=['Company', 'Position'], width=VIZ_WIDTH, height=VIZ_HEIGHT).show()

In [None]:
# Where are my colleagues mostly located?
## Table with the top 30 companies my colleagues work at, and the number of connections I have at each company.
company_counts = redacted_connections_df['Company'].value_counts().head(30)
company_counts_df = company_counts.reset_index()
company_counts_df.columns = ['Company', 'Number of Connections']
company_counts_df

Unnamed: 0,Company,Number of Connections
0,Goldman Sachs,63
1,Netflix,48
2,Amazon,41
3,Slalom,28
4,Freelance,24
5,Pomona College,24
6,Microsoft,20
7,Google,18
8,Self-employed,18
9,Deloitte,16


In [None]:
# What types of roles do most of my colleagues have?
px.treemap(redacted_connections_df.dropna(subset=['Position', 'Company']), path=['Position', 'Company'], width=VIZ_WIDTH, height=VIZ_HEIGHT).show()

In [None]:
# What job position do my colleagues mostly hold?
## Table with the top 30 positions my colleagues have and the number of connections I have at each position.
position_counts = redacted_connections_df['Position'].value_counts().head(30)
position_counts_df = position_counts.reset_index()
position_counts_df.columns = ['Position', 'Number of Connections']
position_counts_df

Unnamed: 0,Position,Number of Connections
0,Data Analyst,169
1,Data Engineer,69
2,Software Engineer,65
3,Data Scientist,63
4,Founder,57
5,Senior Data Analyst,47
6,Business Analyst,46
7,Vice President,38
8,Associate,32
9,Business Intelligence Analyst,27


In [None]:
# Group my positions into decision makers (ie. VPs and up) and non-decision makers (ie. individual contributors and managers)
decision_makers = ['VP', 'SVP', 'EVP', 'Chief', 'C-level', 'President', 'Director']
redacted_connections_df['Decision Maker'] = redacted_connections_df['Position'].apply(
    lambda x: 1 if isinstance(x, str) and any(dm in x for dm in decision_makers) else 0
)
# Improve value_counts labels for clarity
decision_maker_counts = redacted_connections_df['Decision Maker'].value_counts().rename({1: 'Decision Maker', 0: 'Non-Decision Maker'})
decision_maker_counts



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Non-Decision Maker    4294
Decision Maker         416
Name: Decision Maker, dtype: int64

In [49]:
# Visualize Decision Maker distribution as a pie chart
import plotly.graph_objects as go
fig = go.Figure(data=[
    go.Pie(labels=decision_maker_counts.index, values=decision_maker_counts.values, hole=0.3)
])
fig.update_layout(title_text="Decision Maker vs Non-Decision Maker Distribution")
fig.show()

## Business Development Analytics

### Decision-Makers in Target Roles

In [50]:
# Define decision-maker levels and target functional areas
decision_levels = ['VP', 'SVP', 'EVP', 'Chief', 'C-level', 'President', 'Director']
target_roles = ['Data Analytics', 'Engineering', 'Data Science', 'Machine Learning']

# Case-insensitive filtering
decision_makers = connections_df[
    (connections_df['Position'].fillna('').str.contains('|'.join(decision_levels), case=False)) &
    (connections_df['Position'].fillna('').str.contains('|'.join(target_roles), case=False))
].copy()

# Select relevant columns and rename for clarity
decision_makers = decision_makers[['First Name', 'Last Name', 'Position', 'Company', 'URL']].copy()
decision_makers.columns = ['First Name', 'Last Name', 'Position', 'Company', 'LinkedIn URL']

# Sort by company, then by position
decision_makers = decision_makers.sort_values(['Company', 'Position']).reset_index(drop=True)

print(f"Found {len(decision_makers)} decision-makers matching your criteria\n")
print("=" * 120)

# Display grouped by company with details
for company in decision_makers['Company'].unique():
    company_people = decision_makers[decision_makers['Company'] == company]
    print(f"\nüìç {company} ({len(company_people)} contact{'s' if len(company_people) > 1 else ''})")
    print("-" * 120)
    for idx, row in company_people.iterrows():
        print(f"  ‚Ä¢ {row['First Name']} {row['Last Name']}")
        print(f"    Position: {row['Position']}")
        print(f"    LinkedIn: {row['LinkedIn URL']}\n")

Found 8 decision-makers matching your criteria


üìç America Votes (1 contact)
------------------------------------------------------------------------------------------------------------------------
  ‚Ä¢ Sarah Normoyle
    Position: National Director of Engineering and Product
    LinkedIn: https://www.linkedin.com/in/sarahnormoyle


üìç CCS Fundraising (1 contact)
------------------------------------------------------------------------------------------------------------------------
  ‚Ä¢ John Sammis
    Position: Executive Vice President, Data Analytics
    LinkedIn: https://www.linkedin.com/in/john-sammis-56a7443


üìç City National Bank (1 contact)
------------------------------------------------------------------------------------------------------------------------
  ‚Ä¢ Xing  Fu
    Position: Assistant Vice President, CRA Data Analytics Analyst
    LinkedIn: https://www.linkedin.com/in/xfu07


üìç Nike (1 contact)
-----------------------------------------------------------

In [51]:
# Display decision-makers dataframe
print(f"\nüìä Decision-Makers Summary:")
print(f"   ‚Ä¢ Total: {len(decision_makers)}")
print(f"   ‚Ä¢ Companies represented: {decision_makers['Company'].nunique()}")
print("\n")
decision_makers


üìä Decision-Makers Summary:
   ‚Ä¢ Total: 8
   ‚Ä¢ Companies represented: 8




Unnamed: 0,First Name,Last Name,Position,Company,LinkedIn URL
0,Sarah,Normoyle,National Director of Engineering and Product,America Votes,https://www.linkedin.com/in/sarahnormoyle
1,John,Sammis,"Executive Vice President, Data Analytics",CCS Fundraising,https://www.linkedin.com/in/john-sammis-56a7443
2,Xing,Fu,"Assistant Vice President, CRA Data Analytics A...",City National Bank,https://www.linkedin.com/in/xfu07
3,Sai Sharanya,Nalla,Principal Machine Learning Director | Enterpri...,Nike,https://www.linkedin.com/in/sharanyanalla
4,Thomas,"Mueller, MSIS","Director, BI Data Engineering",R1 RCM,https://www.linkedin.com/in/tmspecial
5,Elaine,Hardwick,Senior Director of Software Engineering - Plat...,Rapid7,https://www.linkedin.com/in/elainehardwick
6,Rachel,Whaley,"Director, Data Science & Operations",RedCircle,https://www.linkedin.com/in/rachelrwhaley
7,Steve,Re,"VP, Software Engineering",Versant Media,https://www.linkedin.com/in/stevere


### Broader Prospect Analysis (Tiered)

In [52]:
# Broader criteria for prospects
vp_plus = ['VP', 'SVP', 'EVP', 'Chief', 'C-level', 'President']
director_level = ['Director', 'VP', 'SVP', 'EVP', 'Chief', 'C-level', 'President']
senior_level = ['Senior', 'Manager', 'Director', 'VP', 'SVP', 'EVP', 'Chief', 'C-level', 'President']
target_keywords = ['Data', 'Engineering', 'Analytics', 'Science', 'Machine Learning', 'AI', 'ML']

# Create prospect tiers
prospects_tier1 = connections_df[
    (connections_df['Position'].fillna('').str.contains('|'.join(vp_plus), case=False)) &
    (connections_df['Position'].fillna('').str.contains('|'.join(target_keywords), case=False))
].copy()

prospects_tier2 = connections_df[
    (connections_df['Position'].fillna('').str.contains('|'.join(senior_level), case=False)) &
    (connections_df['Position'].fillna('').str.contains('|'.join(target_keywords), case=False))
].copy()

# Remove tier1 from tier2 to avoid duplication
prospects_tier2 = prospects_tier2[~prospects_tier2.index.isin(prospects_tier1.index)]

# All in target roles (regardless of seniority)
all_target_role = connections_df[
    connections_df['Position'].fillna('').str.contains('|'.join(target_keywords), case=False)
].copy()

# Clean up columns
for df in [prospects_tier1, prospects_tier2, all_target_role]:
    if len(df) > 0:
        df.drop_duplicates(subset=['First Name', 'Last Name', 'Company'], inplace=True)

print(f"\nüéØ BROADER PROSPECT BREAKDOWN:\n")
print(f"Tier 1 (VP+ in Data/Engineering): {len(prospects_tier1)} prospects")
print(f"Tier 2 (Senior/Manager in Data/Engineering): {len(prospects_tier2)} prospects")
print(f"All in Target Roles (any level): {len(all_target_role)} prospects")
print(f"\nCombined unique prospects: {len(set(list(prospects_tier1.index) + list(prospects_tier2.index) + list(all_target_role.index)))}")
print(f"\n" + "=" * 120)


üéØ BROADER PROSPECT BREAKDOWN:

Tier 1 (VP+ in Data/Engineering): 22 prospects
Tier 2 (Senior/Manager in Data/Engineering): 301 prospects
All in Target Roles (any level): 1254 prospects

Combined unique prospects: 1254



In [53]:
# Add tier information and combine all prospects
prospects_tier1['Tier'] = 'Tier 1: VP+ in Data/Engineering'
prospects_tier2['Tier'] = 'Tier 2: Senior/Manager in Data/Engineering'
all_target_role['Tier'] = 'Tier 3: All Data/Engineering roles'

# Combine and deduplicate
all_prospects = pd.concat([prospects_tier1, prospects_tier2, all_target_role])
all_prospects = all_prospects.drop_duplicates(subset=['First Name', 'Last Name', 'Company'])

# Select and clean columns
all_prospects = all_prospects[['First Name', 'Last Name', 'Position', 'Company', 'URL', 'Tier']].copy()
all_prospects.columns = ['First Name', 'Last Name', 'Position', 'Company', 'LinkedIn URL', 'Tier']

# Sort by tier, then company
tier_order = {'Tier 1: VP+ in Data/Engineering': 0, 'Tier 2: Senior/Manager in Data/Engineering': 1, 'Tier 3: All Data/Engineering roles': 2}
all_prospects['tier_order'] = all_prospects['Tier'].map(tier_order)
all_prospects = all_prospects.sort_values(['tier_order', 'Company', 'Position']).drop('tier_order', axis=1).reset_index(drop=True)

# Display tiered prospects dataframe
print(f"\nüìä All Prospects Summary:")
print(f"   ‚Ä¢ Total: {len(all_prospects)}")
print(f"   ‚Ä¢ Companies represented: {all_prospects['Company'].nunique()}")
print(f"   ‚Ä¢ Tier 1 (VP+): {len(all_prospects[all_prospects['Tier'] == 'Tier 1: VP+ in Data/Engineering'])}")
print(f"   ‚Ä¢ Tier 2 (Senior/Manager): {len(all_prospects[all_prospects['Tier'] == 'Tier 2: Senior/Manager in Data/Engineering'])}")
print(f"   ‚Ä¢ Tier 3 (All roles): {len(all_prospects[all_prospects['Tier'] == 'Tier 3: All Data/Engineering roles'])}")
print("\n")
all_prospects #TODO: Update to view full dataframe instead of summary stats


üìä All Prospects Summary:
   ‚Ä¢ Total: 1254
   ‚Ä¢ Companies represented: 1044
   ‚Ä¢ Tier 1 (VP+): 22
   ‚Ä¢ Tier 2 (Senior/Manager): 301
   ‚Ä¢ Tier 3 (All roles): 931




Unnamed: 0,First Name,Last Name,Position,Company,LinkedIn URL,Tier
0,Doug,Sack,"Vice President - Research, Data & Analytics",Annapurna,https://www.linkedin.com/in/dougsack,Tier 1: VP+ in Data/Engineering
1,Brad,Coverdale,"Founder, Chief Data Strategist",BC Data Insights LLC,https://www.linkedin.com/in/bradcoverdale90day...,Tier 1: VP+ in Data/Engineering
2,Harry L.,Reavis Jr,Vice President Global Data Center Facilities M...,Brown Brothers Harriman,https://www.linkedin.com/in/harry-l-r-b7538bb,Tier 1: VP+ in Data/Engineering
3,John,Sammis,"Executive Vice President, Data Analytics",CCS Fundraising,https://www.linkedin.com/in/john-sammis-56a7443,Tier 1: VP+ in Data/Engineering
4,Ray,Morris,Chief Data Officer,COUNTRY Financial¬Æ,https://www.linkedin.com/in/raycmorris,Tier 1: VP+ in Data/Engineering
5,Chih-Yi(Jerry),Chen,AVP - Counterparty Credit Risk Analytics,Citi,https://www.linkedin.com/in/-jerry-chen,Tier 1: VP+ in Data/Engineering
6,Lauren,Cathey,"Wholesale Lending Data Business Analyst, VP",Citi,https://www.linkedin.com/in/lauren-cathey-9030...,Tier 1: VP+ in Data/Engineering
7,Xing,Fu,"Assistant Vice President, CRA Data Analytics A...",City National Bank,https://www.linkedin.com/in/xfu07,Tier 1: VP+ in Data/Engineering
8,Matthias,"S. ,PMP",Chief Data Scientist,Ethegra Technologies LLC,https://www.linkedin.com/in/matthew-s-data-spe...,Tier 1: VP+ in Data/Engineering
9,Michaela Ren√©e,Stalder,"AVP, Capital Raising",GID,https://www.linkedin.com/in/michaela-stalder,Tier 1: VP+ in Data/Engineering
