## Get connections of Judicial Watch

In [20]:
import requests
from bs4 import BeautifulSoup
import time

# Base URL for ProPublica's nonprofit search results
base_url = "https://projects.propublica.org/nonprofits/full_text_search"

# Number of pages to scrape (increase if needed)
num_pages = 14  # You can adjust this as needed

# Search query parameter
search_query = "52-1885088"

# A list to hold all company information
company_info_list = []

# Loop through each page and scrape the company information
for page_num in range(1, num_pages + 1):
    # Build the URL for the specific page
    page_url = f"{base_url}?page={page_num}&q={search_query}"

    # Fetch the page content
    response = requests.get(page_url)
    response.raise_for_status()  # Ensure successful request

    # Parse the HTML content
    soup = BeautifulSoup(response.text, "html.parser")

    # Find all elements containing company names
    company_elements = soup.find_all("div", class_="form-name text-link result-item__hed margin-right")

    # Extract the text (company name, form type, year) from each element
    for elem in company_elements:
        raw_text = elem.find("a").get_text(strip=True)

        # Split the raw text to get relevant parts
        parts = raw_text.split('—')

        # Get the company name
        company_name = parts[0].strip()

        # Get the form details
        form_info = parts[1].strip() if len(parts) > 1 else ""

        # Determine the relationship and extract the year
        if "Schedule I" in form_info:
            relationship = "Grant"
        elif "Schedule A" in form_info:
            relationship = "Supported Company"
        else:
            relationship = "Unknown"

        # Extract the year from the form information
        year_part = form_info.split("(")[-1].strip(")")
        year = int(year_part) if year_part.isdigit() else "Unknown"

        # Create a dictionary to store the company's details
        company_info = {
            "Company Name": company_name,
            "Relationship": relationship,
            "Year": year,
        }
        
        print(company_info)

        # Add the company info to the list
        company_info_list.append(company_info)
    
    time.sleep(10)  # Pause for 10 seconds before scraping the next page

# Output all collected company information
# for info in company_info_list:
#     print(f"Company Name: {info['Company Name']}")
#     print(f"Relationship: {info['Relationship']}")
#     print(f"Year: {info['Year']}")
#     print("----")

{'Company Name': 'Citizens For Home Rule Inc', 'Relationship': 'Grant', 'Year': 2023}
{'Company Name': 'Phoenix Law Enforcement Association', 'Relationship': 'Grant', 'Year': 2017}
{'Company Name': 'The Halaj Foundation', 'Relationship': 'Supported Company', 'Year': 2013}
{'Company Name': 'Leadership Institute', 'Relationship': 'Grant', 'Year': 2019}
{'Company Name': 'Center Of The American Experiment', 'Relationship': 'Grant', 'Year': 2022}
{'Company Name': 'Center Of The American Experiment', 'Relationship': 'Grant', 'Year': 2021}
{'Company Name': 'National Center For Housing Management', 'Relationship': 'Grant', 'Year': 2018}
{'Company Name': 'Center Of The American Experiment', 'Relationship': 'Grant', 'Year': 2020}
{'Company Name': 'Community Foundation Donor Directed Fund', 'Relationship': 'Grant', 'Year': 2018}
{'Company Name': 'Kauai Energy Institute', 'Relationship': 'Grant', 'Year': 2020}
{'Company Name': 'Capital Community Foundation Inc', 'Relationship': 'Grant', 'Year': 20

In [28]:
unique_companies = set()
filtered_data = []

for entry in company_info_list:
    company_name = entry['Company Name']
    relationship = entry['Relationship']
    
    # Ignore entries with 'Unknown' relationship
    if relationship.lower() == 'unknown':
        continue
    
    # If the company name is unique, add it to the set and filtered data
    if company_name not in unique_companies:
        unique_companies.add(company_name)
        filtered_data.append({
            'Company Name': company_name,
            'Relationship': relationship
        })

filtered_data

[{'Company Name': 'Citizens For Home Rule Inc', 'Relationship': 'Grant'},
 {'Company Name': 'Phoenix Law Enforcement Association',
  'Relationship': 'Grant'},
 {'Company Name': 'The Halaj Foundation', 'Relationship': 'Supported'},
 {'Company Name': 'Leadership Institute', 'Relationship': 'Grant'},
 {'Company Name': 'Center Of The American Experiment',
  'Relationship': 'Grant'},
 {'Company Name': 'National Center For Housing Management',
  'Relationship': 'Grant'},
 {'Company Name': 'Community Foundation Donor Directed Fund',
  'Relationship': 'Grant'},
 {'Company Name': 'Kauai Energy Institute', 'Relationship': 'Grant'},
 {'Company Name': 'Capital Community Foundation Inc', 'Relationship': 'Grant'},
 {'Company Name': 'Ramtell Inc', 'Relationship': 'Supported'},
 {'Company Name': 'The Helen Diller Family Foundation',
  'Relationship': 'Grant'},
 {'Company Name': 'The Jewish Federation Of Sarasota Manatee Inc',
  'Relationship': 'Grant'},
 {'Company Name': 'The American Foundation For C

In [38]:
company_names = [company['Company Name'] for company in filtered_data]
company_names

['Citizens For Home Rule Inc',
 'Phoenix Law Enforcement Association',
 'The Halaj Foundation',
 'Leadership Institute',
 'Center Of The American Experiment',
 'National Center For Housing Management',
 'Community Foundation Donor Directed Fund',
 'Kauai Energy Institute',
 'Capital Community Foundation Inc',
 'Ramtell Inc',
 'The Helen Diller Family Foundation',
 'The Jewish Federation Of Sarasota Manatee Inc',
 'The American Foundation For Charitable Support Inc',
 'Community Foundation Of North Florida Inc',
 'The Greater Pinebelt Community Foundation',
 'Community Foundation For Nantucket',
 'Family Office Foundation Incorporated',
 'Wiseheart Foundation',
 'Greene County Community Foundation',
 'Cornell University Foundation',
 'Oregon Jewish Community Foundation',
 'Bradley Impact Fund Inc',
 'The Community Foundation Of The Texas Hill Country Inc',
 'Richland County Foundation Of Mansfield',
 'East Texas Communties Foundation Inc',
 'Southwest Florida Community Foundation Inc',


## Get EIN number of companies

In [39]:
# Base endpoint for the ProPublica Nonprofit Explorer API
base_url = "https://projects.propublica.org/nonprofits/api/v2/search.json"

# Dictionary to store company names and their EINs
company_eins = {}

for name in company_names:
    # Construct the request URL with the company name
    response = requests.get(f"{base_url}?q={name}")
    
    if response.status_code == 200:
        data = response.json()
        # Check if there are organizations in the response
        if 'organizations' in data and data['organizations']:
            # Get the EIN from the first organization in the results
            ein = data['organizations'][0]['ein']
            company_eins[name] = ein
        else:
            company_eins[name] = "EIN not found"
    else:
        company_eins[name] = "Error in API request"

print(company_eins)

{'Citizens For Home Rule Inc': 621113012, 'Phoenix Law Enforcement Association': 510189787, 'The Halaj Foundation': 954700852, 'Leadership Institute': 510235174, 'Center Of The American Experiment': 363611426, 'National Center For Housing Management': 520955650, 'Community Foundation Donor Directed Fund': 830313321, 'Kauai Energy Institute': 274346053, 'Capital Community Foundation Inc': 561942969, 'Ramtell Inc': 351925455, 'The Helen Diller Family Foundation': 311655715, 'The Jewish Federation Of Sarasota Manatee Inc': 591227747, 'The American Foundation For Charitable Support Inc': 860857725, 'Community Foundation Of North Florida Inc': 593473384, 'The Greater Pinebelt Community Foundation': 721390352, 'Community Foundation For Nantucket': 134316755, 'Family Office Foundation Incorporated': 472788229, 'Wiseheart Foundation': 455085493, 'Greene County Community Foundation': 311751001, 'Cornell University Foundation': 222848738, 'Oregon Jewish Community Foundation': 931019725, 'Bradley

## Get level 2 connections

In [59]:
import requests
from bs4 import BeautifulSoup
import time
import random

# Base URL for ProPublica's nonprofit search results
base_url = "https://projects.propublica.org/nonprofits/full_text_search"

# Pause between requests (in seconds) to avoid rate limiting
scrape_delay = 30.01

# Maximum number of pages to attempt scraping
max_pages = 15


def get_company_connections(search_query, max_pages=15):
    """
    Scrapes the ProPublica Nonprofit Explorer for connections related to a specific EIN or company name.
    Filters the results to ensure unique company names and exclude 'Unknown' relationships.

    Args:
    - search_query (str): The EIN or company name to search for.
    - max_pages (int): Maximum number of pages to scrape for connections.

    Returns:
    - List[dict]: A list of unique dictionaries with filtered connection details.
    """
    company_info_list = []
    unique_companies = set()

    for page_num in range(1, max_pages + 1):
        
        # Pause before scraping the next page
        time.sleep(scrape_delay + random.uniform(0, 1))
        
        # Build the URL for the specific page
        page_url = f"{base_url}?page={page_num}&q={search_query}"

        # Fetch the page content and handle 404 error
        response = requests.get(page_url)
        
        if response.status_code == 404:
            break  # Exit the loop if the page does not exist
        elif not response.ok:
            # Log error and continue to the next page
            print(f"Error fetching page {page_num} for {search_query}: {response.status_code}")
            break

        response.raise_for_status()  # Ensure successful request

        # Parse the HTML content
        soup = BeautifulSoup(response.text, "html.parser")

        # Find all elements containing company names
        company_elements = soup.find_all("div", class_="form-name text-link result-item__hed margin-right")

        # If no elements are found, assume we've reached the end
        if not company_elements:
            break
        
        # Extract the text (company name, form type, year) from each element
        for elem in company_elements:
            raw_text = elem.find("a").get_text(strip=True)

            # Split the raw text to get relevant parts
            parts = raw_text.split('—')

            # Get the company name
            company_name = parts[0].strip()

            # Get the form details
            form_info = parts[1].strip() if len(parts) > 1 else ""

            # Determine the relationship and extract the year
            if "Schedule I" in form_info:
                relationship = "Grant"
            elif "Schedule A" in form_info:
                relationship = "Supported Company"
            else:
                relationship = "Unknown"

            # If the relationship is 'Unknown', skip it
            if relationship.lower() == 'unknown':
                continue

            # Add unique company names to set and filtered data
            if company_name not in unique_companies:
                unique_companies.add(company_name)
                
                # Extract the year from the form information
                year_part = form_info.split("(")[-1].strip(")")
                year = int(year_part) if year_part.isdigit() else "Unknown"
                
                # Add the filtered data to the list
                company_info_list.append({
                    'Company Name': company_name,
                    'Relationship': relationship,
                    'Year': year,
                })
                
                print(company_name, relationship, year)

    return company_info_list


# Define the list of company names (previously obtained)
company_names = list(company_eins.values())

# Dictionary to store all company connections
all_connections = {}

# Get connections for each company in the list
for i, company_name in enumerate(company_names):
    connections = get_company_connections(company_name, max_pages=max_pages)  # Fetch connections for the company
    all_connections[list(company_eins.keys())[i]] = connections  # Store the connections in the dictionary

# Display all connections for each company
for company_name, connections in all_connections.items():
    print(f"Connections for {company_name}:")
    for connection in connections:
        print(f"  - {connection['Company Name']} (Relationship: {connection['Relationship']}, Year: {connection['Year']})")
    print("\n")

The William A And Genevieve H Strong Foundation Tr Supported Company 2022
Leadership Exchange Academy Inc Grant 2016
American Future Fund Grant 2015
Kenosha Community Foundation Grant 2021
Donors Capital Fund Inc Supported Company 2020
Ramtell Inc Supported Company 2015
Charles Koch Institute Grant 2016
The American Foundation For Charitable Support Inc Supported Company 2016
Horizon Community Funds Of Northern Kentucky Inc Grant 2019
Hutchinson Community Foundation Grant Unknown
Bradley Impact Fund Inc Grant 2015
Community Foundation For Mississippi Grant Unknown
Community Foundation Of Abilene Grant 2016
Community Foundation Of Southwest Louisiana Grant 2022
Community Foundation Of The Eastern Shore Inc Grant 2021
Cornell University Foundation Grant 2022
University Impact Grant 2022
Richland County Foundation Of Mansfield Grant 2021
Southwest Florida Community Foundation Inc Grant 2021
Community Foundation Of Herkimer And Oneida Counties Inc Grant 2021
Rancho Santa Fe Foundation Gran

In [60]:
import csv

# Storing the filtered connections in a CSV file
csv_file_path = "company_connections_3.csv"

with open(csv_file_path, mode="w", newline="", encoding="utf-8") as csv_file:
    csv_writer = csv.writer(csv_file)

    # Write the header row
    csv_writer.writerow(["Company Name", "Connection Name", "Relationship", "Year"])

    # Write data for each company and its connections
    for company_name, connections in all_connections.items():
        for connection in connections:
            csv_writer.writerow([
                company_name,
                connection["Company Name"],
                connection["Relationship"],
                connection["Year"],
            ])

print(f"Filtered data saved to CSV file: {csv_file_path}")

Filtered data saved to CSV file: company_connections_3.csv
