In [23]:
import os
import pandas as pd

# Read the data
directory = '/Users/Anubha/Library/CloudStorage/OneDrive-DrexelUniversity/Anubha & Dr. T - Shared/Urban Regulatory projects/Urban Final'
folder_path = f"{directory}/Output/Matching Urban to Drexel"
output_path = f"{directory}/Output/Matching Urban to Drexel Formatted"

def process_file(file_path):
    data = pd.read_excel(file_path, engine='openpyxl')

    # Find the index of the 'STATE' column
    state_column_index = data.columns.get_loc('STATE')

    # Generate the 'ID-State' based on the state name
    id_states = {}
    state_counts = {}

    for index, row in data.iterrows():
        state = row['STATE']
        if state not in state_counts:
            state_counts[state] = -1  # Set initial count to -1 to start from '000'
        state_counts[state] += 1
        id_states[index] = f"id-{state[:2].lower()}-{state_counts[state]:03d}"

    # Add the 'ID-State' column
    data['ID-State'] = pd.Series(id_states)

    # Add the 'ID' column
    data['ID'] = 'id-' + data['Unnamed: 0'].astype(str)
    
    # Dictionary mapping Regulatory Type to Abbreviation
    regulatory_type_abbr = {
        'Administrative Dissolutions': 'AD',
        'Does the state require annual financial reporting by commercial fundraisers?': 'AF',
        'Audit Threshold': 'AT',
        'Bifurcated': 'BF',
        'Does the state require bonding of professional fundraisers?': 'BO',
        'Certificate of Amendments': 'CA',
        'Does the state oversee commercial-coventuring (e.g. by requiring that the co-venture be registered or by requiring that the charitable organization files the co-venture contract)?': 'CC',
        'Does the state require registration by commercial fundraisers?': 'CF',
        'Charitable Trusts': 'CT',
        'Dissolution': 'DS',
        'Educational Institutions': 'EI',
        'Does the state require registration by fundraising counsel?': 'FC',
        'Foundations': 'FD',
        'Does the state require a copy of any contract between a charitable organization and a commercial fundraiser or fundraising counsel be filed with the regulator?': 'FF',
        'Fraternal/ Membership': 'FM',
        'Foundations that don\'t solicit contributions': 'FS',
        'Governmental Organizations': 'GO',
        'Hospitals': 'HO',
        'Has Statute': 'HS',
        'Judicial Dissolutions': 'JD',
        'Mergers': 'MG',
        'Requires Notice or Oversight': 'NO',
        'Non-soliciting': 'NS',
        'Does the state require the fundraisers to provide notice to the regulator before any solicitation campaign (in addition to annual registration and/or filing the contract)?': 'NT',
        'Other': 'OT',
        'Political Orgs': 'PO',
        'Parent-Teacher Organizations': 'PT',
        'Requires Audit': 'RA',
        'Reports to congress': 'RC',
        'Does the state require annual financial reporting by charitable organizations in addition to filing a copy of the 990 with the regulator (if filing 990 is required)?': 'RG',
        'Requires Filing': 'RF',
        'Registration Law': 'RL',
        'Removal of Board Members': 'RM',
        'Registration Office': 'RE',
        'Religious Organizations': 'RO',
        'Sale of Assets': 'SA',
        'Does the state require specified disclosures to donors?': 'SD',
        'Small organizations': 'SO',
        'Voluntary Dissolutions': 'VD',
        'Veterans organizations': 'VO'
    }

    # Add the 'Regulatory Type Abbr' column
    data['Regulatory Type Abbr'] = data['Regulatory Type'].map(regulatory_type_abbr)

    output_file_path = os.path.join(output_path, os.path.basename(file_path))
    data.to_excel(output_file_path, index=False, engine='openpyxl')
    
for filename in os.listdir(folder_path):
    if filename.endswith('.xlsx'):
        file_path = os.path.join(folder_path, filename)
        process_file(file_path)

print("Processing complete!")    


Processing complete!


In [22]:
# Show the updated DataFrame
print(data.head())

   Unnamed: 0  Unnamed: 0.1    STATE      Regulatory Type  \
0        2571             0  Wyoming                STATE   
1        2572             1  Wyoming           Bifurcated   
2        2573             2  Wyoming  Registration Office   
3        2574             3  Wyoming       Sale of Assets   
4        2575             4  Wyoming       Sale of Assets   

     Regulatory Type Full Regulatory Body         Regulation Indicator  \
0                   STATE             NaN  Wyoming                       
1              Bifurcated             NaN                           No   
2     Registration Office             NaN                          NaN   
3     Sale of Assets - AG              AG                          Yes   
4  Sale of Assets - Other           Other                          Yes   

                                               Notes  \
0                        Wyoming                       
1           Charitable Orgs Not Required to Register   
2                   