In [127]:
import pandas as pd
import numpy as np

## Charity Regulator Data

In [128]:
ann_reports = pd.read_csv("../Charity_Data/annual-reports-15012021.csv")
pub_reg = pd.read_csv("../Charity_Data/public-register-15012021.csv").drop(['Unnamed: 11'], axis=1)

In [139]:
def merge_regulator_data(annual_reps, pub_registry):
    all_regulator = annual_reps.merge(pub_registry, on='Registered Charity Number', how='left').drop(['Registered Charity Name_y'], axis=1).rename(columns={"Registered Charity Name_x": "Registered Charity Name"})
    print("MERGING APPEARS TO HAVE GONE SMOOTHLY.")

def include_missing_purpose(data):
    data.loc[(data['Report Activity'].str.contains("Religious activities") | data['Beneficiaries'].str.contains("Religious|Religion|Priests")) & data['Charitable Purpose'].isnull(), "Charitable Purpose"] = "Advancement of religion"
    
    data.loc[data['Report Activity'].str.contains("Promotion of community") & data['Charitable Purpose'].isnull(), "Charitable Purpose"] = "Advancement of community development, including rural or urban regeneration"
    
    data.loc[data['Report Activity'].str.contains("Promotion of health") & data['Charitable Purpose'].isnull(), "Charitable Purpose"] = "Promotion of health, including the prevention or relief of sickness, disease or human suffering"
    
    data.loc[data['Report Activity'].str.contains("Advancement of Arts|Cultural promotion") & data['Charitable Purpose'].isnull(), "Charitable Purpose"] = "Advancement of the arts, culture, heritage or sciences"
    
    data.loc[data['Report Activity'].str.contains("Animal welfare") & data['Charitable Purpose'].isnull(), "Charitable Purpose"] = "Prevention or relief of suffering of animals"
    
    data.loc[(data['Report Activity'].str.contains("Welfare/benevolent|Provision of accommodation/housing|Welfare of those in need|Disability support")) & data['Charitable Purpose'].isnull(), "Charitable Purpose"] = "Advancement of community welfare including the relief of those in need by reason of youth, age, ill-health, or disability"
    
    data.loc[(data['Report Activity'].str.contains("Education|Research/evaluation|Playgroup/afterschool") | data['Beneficiaries'].str.contains("University|School|College")) & data['Charitable Purpose'].isnull(), "Charitable Purpose"] = "Advancement of education"

    data.loc[(data['Report Activity'].str.contains("Relief of poverty|Overseas aid/famine relief")) & data['Charitable Purpose'].isnull(), "Charitable Purpose"] = "Relief of poverty or economic hardship"

    if(len(data[data['Charitable Purpose'].isna()]) > 0):
        print("INCLUDE MISSING PURPOSE FAILED")
        print("These are the rows without a recoverable match")
        print(data[data['Charitable Purpose'].isna()])
        print(" ")
    else:
        print("INCLUDE MISSING PURPOSE SUCCESS")
        print(" ")

def condense_purpose(data):
    
    data['Purpose'] = "Undefined"
    col = data['Charitable Purpose'].str.split(':').str[0]

    edu_vals = ('Advancement of education', 'aa')
    rel_vals = ('Promotion of religious or racial harmony and harmonious community relations', 'Advancement of religion')
    com_vals = ('Advancement of community development', 'Other purpose that is of benefit to the community')
    arts_vals = ('Advancement of the arts, culture, heritage or sciences', 'aa')
    ani_env_vals = ('Prevention or relief of suffering of animals', 'aa')
    health_vals = ('Promotion of health, including the prevention or relief of sickness, disease or human suffering', 'aa')
    
    data.loc[(col.isin(edu_vals), 'Purpose')] = "Education"
    data.loc[(col.isin(rel_vals), 'Purpose')] = "Religion"
    data.loc[(col.isin(com_vals), 'Purpose')] = "Community"
    data.loc[(col.isin(arts_vals), 'Purpose')] = "Arts / Culture / Science"
    data.loc[(col.isin(ani_env_vals), 'Purpose')] = "Animals / Environment"
    data.loc[(col.isin(health_vals), 'Purpose')] = "Health"

    num_nas = len(data[data['Purpose']=="Undefined"])
    if(num_nas > 0):
        print("CONDENSE PURPOSE FAILED")
        print("There are "+str(num_nas)+" rows without a condensable purpose, here are the long-form purposes:")
        print(data[data['Purpose']=="Undefined"]['Charitable Purpose'])
        print(" ")
    else:
        print("CONDENSE PURPOSE SUCCESS")
        print(" ")

In [140]:
merge_regulator_data(ann_reports, pub_reg)
include_missing_purpose(all_regulator)
condense_purpose(all_regulator)

#all_regulator.to_csv('../Charity_Data/combined_regulator_data.csv', index=False)

MERGING APPEARS TO HAVE GONE SMOOTHLY.
INCLUDE MISSING PURPOSE SUCCESS
 
CONDENSE PURPOSE FAILED
There are 24978 rows without a condensable purpose, here are the long-form purposes:
38       Relief of poverty or economic hardship; Advanc...
39       Relief of poverty or economic hardship; Advanc...
40       Relief of poverty or economic hardship; Advanc...
41       Relief of poverty or economic hardship; Advanc...
42       Relief of poverty or economic hardship; Advanc...
                               ...                        
36327    Other purpose that is of benefit to the commun...
36328    Other purpose that is of benefit to the commun...
36329    Other purpose that is of benefit to the commun...
36330    Other purpose that is of benefit to the commun...
36331    Other purpose that is of benefit to the commun...
Name: Charitable Purpose, Length: 24978, dtype: object
 
