In [1]:
import pandas as pd
import networkx as nx

# Read in the contract data from a CSV file
df1 = pd.read_csv('C:\\Users\\Lenovo\\OneDrive\\文档\\CCL 2023\\Business with HRSA Contracts 2019-2023.csv')
df2=pd.read_csv("C:\\Users\\Lenovo\\OneDrive\\文档\\CCL 2023\\Business with CFPB Contracts.csv")
df3=pd.read_csv("C:\\Users\\Lenovo\\OneDrive\\文档\\CCL 2023\\Business with IRS Contracts.csv")
df=pd.concat([df1,df2,df3],axis=0)


# Create a new dataframe to store the company and agency relationships
relationship_data = pd.DataFrame(columns=['Company','Agency', 'Contracts'])

In [2]:
# Loop through each contract and extract the company and agency information
for index, row in df.iterrows():
    company = row['Legal Business Name']
    agency = row['Contracting Office Name']
    contracts = row['Action Obligation ($)']
    
    # Check if the company and agency already have a relationship in the dataframe
    existing_relationship = relationship_data.loc[(relationship_data['Company'] == company) & (relationship_data['Agency'] == agency)]
    
    # If the relationship already exists, update the contract count and total value
    if not existing_relationship.empty:
        relationship_data.loc[(relationship_data['Company'] == company) & (relationship_data['Agency'] == agency), 'Contracts'] += 1
        relationship_data.loc[(relationship_data['Company'] == company) & (relationship_data['Agency'] == agency), 'Total Value'] += contracts
    
    # If the relationship doesn't exist yet, create a new row in the dataframe
    else:
        new_row = {'Company': company, 'Agency': agency, 'Contracts': 1, 'Total Value': contracts}
        relationship_data = relationship_data.append(new_row, ignore_index=True)
    
# Create a new graph using networkx
G = nx.Graph()

# Add the company and agency nodes to the graph
for company in relationship_data['Company'].unique():
    G.add_node(company, node_type='company',name=company)
    
for agency in relationship_data['Agency'].unique():
    G.add_node(agency, node_type='agency',name=agency)

# Add the edges to the graph
for index, row in relationship_data.iterrows():
    company = row['Company']
    agency = row['Agency']
    contracts = row['Contracts']
    
    G.add_edge(company, agency, weight=contracts)

In [3]:
#Identify the offices have relationship to certain company

node_of_interest = 'NORTHROP GRUMMAN SPACE AND MISSION SYSTEMS CORPORATION'

neighbors = list(G.neighbors(node_of_interest))
print(f"The neighbors of {node_of_interest} are: {neighbors}")

The neighbors of NORTHROP GRUMMAN SPACE AND MISSION SYSTEMS CORPORATION are: ['BUREAU OF HEALTH PROFESSSIONS', 'OFFICE OF THE ADMINISTRATOR']


In [4]:
#Identify the connection between company and certain office

company='JEFF MURRAY\'S PROGRAMMING SHOP'
neighbors = G.neighbors(company)
print(f"For {company}")
for neighbor in neighbors:
    print(f"Neighbor: {neighbor}")
    edge_weight = G[company][neighbor]['weight']
    print(f"Connection Score:{edge_weight} out of 113")
    print('----')

For JEFF MURRAY'S PROGRAMMING SHOP
Neighbor: OFFICE OF THE ADMINISTRATOR
Connection Score:28 out of 113
----
Neighbor: HRSA HEADQUARTERS
Connection Score:13 out of 113
----


In [5]:

company='HRSA HEADQUARTERS'
neighbors = G.neighbors(company)
print(f"For{company}")
for neighbor in neighbors:
    print(f"Neighbor: {neighbor}")
    edge_weight = G[company][neighbor]['weight']
    print(f"Connection Score:{edge_weight} out of 113")
    print('----')

ForHRSA HEADQUARTERS
Neighbor: SAPIENT GOVERNMENT SERVICES, INC.
Connection Score:113 out of 113
----
Neighbor: ARCH SYSTEMS LLC
Connection Score:8 out of 113
----
Neighbor: BOOZ ALLEN HAMILTON INC.
Connection Score:7 out of 113
----
Neighbor: QUETEL CORPORATION
Connection Score:1 out of 113
----
Neighbor: SOLE SOLUTIONS INC.
Connection Score:8 out of 113
----
Neighbor: ACTIVEPDF INC
Connection Score:1 out of 113
----
Neighbor: KRM ASSOCIATES, INC
Connection Score:2 out of 113
----
Neighbor: SUPRIYA AGGARWAL
Connection Score:10 out of 113
----
Neighbor: JEFF MURRAY'S PROGRAMMING SHOP
Connection Score:13 out of 113
----
Neighbor: IMMERSIVE CONCEPTS, LLC
Connection Score:4 out of 113
----
Neighbor: SCIENTIFIC SYSTEMS AND SOFTWARE INTERNATIONAL CORPORATION
Connection Score:4 out of 113
----
Neighbor: AQUILENT, INC
Connection Score:6 out of 113
----
Neighbor: SHABAS SOLUTIONS, LLC
Connection Score:6 out of 113
----
Neighbor: SPROUT SOCIAL, INC.
Connection Score:3 out of 113
----
Neighbor: 

In [6]:
#The full list of connections
for source, target, weight in G.edges(data='weight'):
    print(f"{source} -> {target}: {weight}")

NORTHROP GRUMMAN SPACE AND MISSION SYSTEMS CORPORATION -> BUREAU OF HEALTH PROFESSSIONS: 5
NORTHROP GRUMMAN SPACE AND MISSION SYSTEMS CORPORATION -> OFFICE OF THE ADMINISTRATOR: 8
PRIMESCAPE SOLITIONS INC -> OFFICE OF THE ADMINISTRATOR: 45
SAPIENT GOVERNMENT SERVICES, INC. -> HRSA HEADQUARTERS: 113
SAPIENT GOVERNMENT SERVICES, INC. -> OFFICE OF THE ADMINISTRATOR: 6
ARCH SYSTEMS LLC -> HRSA HEADQUARTERS: 8
BOOZ ALLEN HAMILTON INC. -> HRSA HEADQUARTERS: 7
LUX CONSULTING GROUP -> OFFICE OF THE ADMINISTRATOR: 5
LUX CONSULTING GROUP -> BUREAU OF HEALTH PROFESSSIONS: 1
LUX CONSULTING GROUP, INC. -> OFFICE OF THE ADMINISTRATOR: 10
MILLENNIUM SERVICES 2000+ INCORPORATED -> OFFICE OF THE ADMINISTRATOR: 5
MINDFINDERS INCORPORATED -> OFFICE OF THE ADMINISTRATOR: 17
ACCESS SYSTEMS INCORPORATED -> OFFICE OF THE ADMINISTRATOR: 5
ACCESS SYSTEMS INCORPORATED -> BUREAU OF HEALTH PROFESSSIONS: 1
NORTHROP GRUMMAN INFORMATION T -> OFFICE OF THE ADMINISTRATOR: 4
NORTHROP GRUMMAN INFORMATION TECHNOLOGY GLOB