In [1]:
import numpy as np
import pandas as pd
import math

In [2]:
pd.options.display.max_columns = 50

In [3]:
# Methods
def unpack_column(dataFrame,col_name):
    newDf = dataFrame.from_records(dataFrame[col_name].dropna().tolist())
    return newDf

def unpack_and_destroy(dataFrame,column):
    temp = unpack_column(dataFrame,column)
    for col in temp.columns:
        dataFrame[col] = temp[col]
    #dataFrame = dataFrame.drop(columns=[column])
    return dataFrame

def sort_db_value(db,value,asc=False):
    return db.sort_values(by = value, ascending = asc)

def freq_array(freq_list):
    freq = []
    for i in freq_list:
        freq.append(i)
    return freq

def set_labels(freq, ax,horz_shift=.2,vert_shift=1000,w="bold",fsize=10,c="b"):
    for i, v in enumerate(freq):
        ax.text(i-horz_shift,v+vert_shift, str(round(v,2)), weight = w,fontsize=fsize,color = c)
        
def wrap_axis_labels(labels_list,wrap_length=13):
    labels_list = [ '\n'.join(wrap(l, wrap_length)) for l in labels_list]
    return labels_list

In [4]:
def zip_five(df):
    '''
    Updates df with a "ZIP_FIVE" column (5 digit zip code)
    
    Parameters
    ----------
    df : Pandas DataFrame with a "ZIP" column
    
    Returns
    -------
    Pandas DataFrame with a ZIP_FIVE column (5 digit zip code)
    '''
    df['ZIP_FIVE'] = df['ZIP'].apply(lambda x: x[:5])
    return df

In [5]:
def find_same_zip(df, zip_code):
    '''
    Looks through Database and returns Entries with same zip code
    
    Parameters
    ----------
    df : Pandas DataFrame of Charities
    zip_code : Zip Code to Match on
    
    Returns
    -------
    Pandas DataFrame with Entries matching provided zip code
    '''
    zip_code = str(zip_code)
    
    temp_df = df[all_regions_df['ZIP_FIVE'] == zip_code]
    temp_df_two = df[all_regions_df['ZIP'] == zip_code]
    temp_df = pd.concat((temp_df,temp_df_two))
    
    return temp_df

In [6]:
def create_NTEE_dict(file):
    '''
    Create NTEE Dictionary from csv file
    
    Parameters
    ----------
    file : CSV file where first column is NTEE code
        and second column is NTEE code description
    
    Returns
    -------
    dictionary mapping NTEE code to NTEE code description
    '''
    NTEE_dict = dict()
    ntee_df = pd.read_csv(file,header=None)
    ntee_df = ntee_df.loc[:,:1]
    
    for index, row in ntee_df.iterrows():
        if type(row.loc[0]) != float:
            NTEE_dict[row.loc[0]] = row.loc[1].rstrip()
    
    return NTEE_dict

In [7]:
def create_major_categories_list(ntee_dict):
    '''
    Creates a list containing the major categories of NTEE codes
    
    Parameters
    ----------
    ntee_dict : Dictionary of NTEE codes and categories
    
    Returns
    -------
    list of major categories of NTEE codes
    '''
    major_letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

    major_categories = []

    for let in major_letters:
        major_categories.append(ntee_dict[let])
    
    return major_categories

In [8]:
def create_NTEE_Categories(df,ntee_dict):
    '''
    Create NTEE_Major_Category and NTEE_Minor_Category and column for a Pandas DataFrame
    
    Parameters
    ----------
    df : Pandas Dataframe containing "NTEE_CD" Column
    ntee_dict : Dictionary containing NTEE Codes as Keys and
        NTEE Categories as Values
    
    Returns
    -------
    Pandas Dataframe with "NTEE_Major_Category" and "NTEE_Minor_Category" column
    '''
    major_letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    
    df['NTEE_Major_Category'] = df['NTEE_CD'].apply(lambda x: ntee_dict[x[0]] if \
                                                    (type(x) == str and x[0] in major_letters) else x)
    
    df['NTEE_Minor_Category'] = df['NTEE_CD'].apply(lambda x: ntee_dict[x[:3]] if \
                                                    (type(x) == str and x[:3] in ntee_dict.keys()) else x)
    
    return df

In [9]:
def find_major_category(df,category:str):
    '''
    Filter a Pandas DataFrame by an NTEE Major Category
    
    Parameters
    ----------
    df : Pandas DataFrame with 'NTEE_Major_Category' column
    category : NTEE Category to Filter
    
    Return
    ------
    Filtered Pandas DataFrame
    '''
    temp_df = df[df['NTEE_Major_Category']==category]
    
    return temp_df

In [10]:
def search_by_zip_and_major_category(df, zipcode, category):
    '''
    
    '''
    temp_df = find_same_zip(df,zipcode)
    temp_df = find_major_category(temp_df,category)
    
    return temp_df

In [11]:
def find_minor_category(df,category:str):
    '''
    Filter a Pandas DataFrame by an NTEE Major Category
    
    Parameters
    ----------
    df : Pandas DataFrame with 'NTEE_Major_Category' column
    category : NTEE Category to Filter
    
    Return
    ------
    Filtered Pandas DataFrame
    '''
    temp_df = df[df['NTEE_Minor_Category']==category]
    
    return temp_df

In [12]:
def search_by_zip_and_minor_category(df, zipcode, category):
    '''
    
    '''
    temp_df = find_same_zip(df,zipcode)
    temp_df = find_minor_category(temp_df,category)
    
    return temp_df

In [15]:
region1_df = pd.read_csv('../data/eo1.csv')
region2_df = pd.read_csv('../data/eo2.csv')
region3_df = pd.read_csv('../data/eo3.csv')
region4_df = pd.read_csv('../data/eo3.csv')
all_regions_df = pd.concat([region1_df,region2_df,region3_df,region4_df])

In [16]:
all_regions_df = all_regions_df[all_regions_df["DEDUCTIBILITY"] == 1]

In [None]:
# all_regions_df = all_regions_df.drop(\
#     columns=['FILING_REQ_CD',"PF_FILING_REQ_CD","STATUS","NTEE_CD"])

In [17]:
all_regions_df = zip_five(all_regions_df)

In [18]:
ntee_dict = create_NTEE_dict('../data/NTEE_file - Sheet1.csv')

In [19]:
ntee_list = create_major_categories_list(ntee_dict)

In [20]:
all_regions_df = create_NTEE_Categories(all_regions_df,ntee_dict)

In [42]:
all_regions_df = all_regions_df.reset_index(drop=True)

In [43]:
ntee_exists_list = list(all_regions_df['NTEE_CD'].dropna().index)

In [47]:
all_regions_df_NTEE = all_regions_df.loc[ntee_exists_list]

In [51]:
all_regions_df_NTEE

Unnamed: 0,EIN,NAME,ICO,STREET,CITY,STATE,ZIP,GROUP,SUBSECTION,AFFILIATION,CLASSIFICATION,RULING,DEDUCTIBILITY,FOUNDATION,ACTIVITY,ORGANIZATION,STATUS,TAX_PERIOD,ASSET_CD,INCOME_CD,FILING_REQ_CD,PF_FILING_REQ_CD,ACCT_PD,ASSET_AMT,INCOME_AMT,REVENUE_AMT,NTEE_CD,SORT_NAME,ZIP_FIVE,NTEE_Major_Category,NTEE_Minor_Category
2,587764,IGLESIA BETHESDA INC,,157 ANDOVER ST,LOWELL,MA,01852-2348,0,3,3,7000,200401,1,10,0,1,1,,0,0,6,0,12,,,,X21,,01852,"Religion-Related, Spiritual Development",Protestant
3,635913,MINISTERIO APOSTOLICO JESUCRISTO ES EL SENOR INC,,454 ESSEX ST,LAWRENCE,MA,01840-1242,0,3,3,7000,200401,1,10,0,1,1,,0,0,6,0,12,,,,X21,,01840,"Religion-Related, Spiritual Development",Protestant
4,765634,MERCY CHAPEL INTERNATIONAL,,75 MORTON VILLAGE DR APT 408,MATTAPAN,MA,02126-2433,0,3,3,7000,200404,1,10,0,1,1,,0,0,6,0,12,,,,X20,,02126,"Religion-Related, Spiritual Development",Christian
5,841363,AGAPE HOUSE OF PRAYER,,39 GOODALE RD STE 2,MATTAPAN,MA,02126-1527,0,3,3,1700,200412,1,10,0,1,1,,0,0,6,0,12,,,,X20,,02126,"Religion-Related, Spiritual Development",Christian
6,852649,BETHANY PRESBYTERIAN CHURCH,,32 HARVARD ST,BROOKLINE,MA,02445-7994,0,3,3,7000,200504,1,10,0,1,1,,0,0,6,0,12,,,,X20,,02445,"Religion-Related, Spiritual Development",Christian
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2110311,996086871,WATERHOUSE CHARITABLE TR,% ALAN TSURUDA,770 KAPIOLANI BLVD STE 506,HONOLULU,HI,96813-5258,0,3,3,1000,200011,1,17,0,2,1,201806.0,8,6,1,0,6,35704565.0,2474123.0,1295808.0,T99,WONG EDWIN S N ET AL TTEE,96813,"Philanthropy, Voluntarism and Grantmaking Foun...","Philanthropy, Voluntarism, and Grantmaking Fou..."
2110312,996087839,KEIKI KOHOLA PROJECT,% RACHEL CARTWRIGHT,1330 SABAL LAKES RD,DELRAY BEACH,FL,33445-1290,0,3,3,1000,200107,1,4,0,1,1,201712.0,2,2,0,3,12,10109.0,22000.0,,U50,CARTWRIGHT RACHAEL,33445,"Science and Technology Research Institutes, Se...","Biological, Life Science Research"
2110313,996088748,UNOYO KOJIMA TRUST,% PACIFIC CENTURY TRUST,PO BOX 3170,HONOLULU,HI,96802-3170,0,3,3,1000,200109,1,4,0,1,1,201803.0,4,3,0,1,3,377564.0,64829.0,,T22,PACIFIC CENTURY TR TTEE,96802,"Philanthropy, Voluntarism and Grantmaking Foun...",Private Independent Foundations
2110314,996089401,TOYO SAKUMOTO CHARITABLE TR,% PACIFIC CENTURY TRUST,PO BOX 3170,HONOLULU,HI,96802-3170,0,3,3,1000,200110,1,4,0,5,1,201903.0,5,3,0,1,3,660843.0,84812.0,,T90,,96802,"Philanthropy, Voluntarism and Grantmaking Foun...",Named Trusts/Foundations N.E.C.


In [50]:
manhasset_df = find_same_zip(all_regions_df_NTEE,11050)

  app.launch_new_instance()


In [None]:
find_major_category(manhasset_df,'Arts, Culture and Humanities')

In [None]:
search_by_zip_and_major_category(all_regions_df,zipcode=11030,category='Education').head(5)

In [None]:
search_by_zip_and_major_category(all_regions_df,zipcode=97133,category='Education')

In [None]:
ntee_list

In [None]:
search_by_zip_and_major_category(all_regions_df,10017,'Youth Development')

## Rate By:
1. Narrow Down Categories
2. Charity Navigator Rating
3. Asset/Income
4. Region/State from ZipCode

In [None]:
from uszipcode import SearchEngine

In [None]:
search = SearchEngine(simple_zipcode=True)

In [None]:
search.by_zipcode(10017)

In [None]:
ntee_list

In [None]:
all_regions_df.groupby('NTEE_Major_Category').size().sort_values(ascending=False)

In [None]:
search_by_zip_and_major_category(all_regions_df,11030,'Animal-Related')

In [None]:
x = all_regions_df.loc[1]

In [None]:
x

In [None]:
x['city'] = x['ZIP_FIVE'].apply