## Donor Classification and Categorization Demo

In [None]:
#import all necessary tools for the demo
import ipywidgets as widgets
import pandas as pd
import numpy as np
import tensorflow.keras as keras

In [None]:
#preprocess function for numerical data
mean = 6906.2298
std = 18287.9453

def preprocess(x):
    return (x - mean) / std

In [None]:
# Define all widgets for alumni data entry

#alumni sex
sex = widgets.RadioButtons(
    options=['M', 'F'],
    disabled=False
)

#alumni age
age = widgets.IntText(
    value = 22,
    description='Age:',
    disabled=False
)

#alumni primary graduation year (If the alumni has multiple degrees from the university, they will choose the graduation year of their first degree
grad_year = widgets.BoundedIntText(
   # description="Primary Graduation Year:",
    value=2022,
    min=1920,
    max = 2099
)

#alumni major
major = widgets.Dropdown(
    options = ['Agriculture', 'Psychology', 'Animal Science', 'Biology', 'Aerospace Administration', 'Information Systems', 'Social Work', 'Sociology', 'Health, Physical Ed & Rec', 'Interdisciplinary Studies', 'Accounting', 'Physical Education', 'Geosciences', 'Mass Communication', 'Political Science', 'Marketing', 'Finance', 'Recording Industry', 'Criminal Justice Admin', 'Aerospace', 'History', 'English', 'Nursing', 'Professional Studies', 'Foreign Languages*', 'Integrated Studies', 'Communication', 'Health and Human Performance', 'Mathematics', 'Social', 'Home Economics', 'Elementary Education', 'Business Administration', 'Industrial Studies', 'Health', 'Chemistry', 'Business Education', 'Economics', 'Art', 'Administration & Supervision', 'General Business', 'P E Elementary Schools', 'Curriculum Specialist', 'Reading', 'Speech & Theatre', 'Urban Planning', 'Computer Science', 'Recording Industry Mgmt', 'Office Management', 'Social Science', 'Food Science and Technology', 'Early Childhood', 'Early Childhood Education', 'Industrial Management', 'Interior Design', 'Music', 'Speech', 'Industrial Technology', 'Speech and Hearing Therapy', 'Special Education', 'Management', 'Medical Technology', 'Professional', 'Recreation', 'Stenographic Business', 'Health Education', 'Administrative Business', 'Curriculum and Instruction', 'Gerontology*', 'Human Sciences', 'Guidance and Counseling', 'University Studies', 'Textiles Merchandising Design', 'Plant & Soil Science', 'Family & Consumer Studies', 'Agribusiness', 'Liberal Studies', 'International Relations', 'Entrepreneurship', 'Engineering Technology', 'Curriculum & Inst/Elem Edu*', 'Industrial & Org Psychology', 'Biochemistry', 'Concrete Industry Management', 'Organizational Communication', 'Construction Management', 'Science', 'Exercise Science', 'Speech/Language Path & Audio', 'Curriculum & Inst/Ed Leadrshp', 'Nutrition & Food Science', 'Physics', 'Global Studies & Cultural Geog', 'Leisure Sprt & Tourism Studies', 'Mechatronics Engineering', 'Animation', 'Public History', 'Global Studies & Human Geog', 'Religious Studies', 'Tourism & Hospitality Mgmt', 'Journalism', 'Theatre', 'Media & Entertainment', 'Art Education', 'International Affairs', 'Video & Film Production', 'Professional Science', 'Applied Social Research', 'Mathematics & Science Educ', 'Information Technology', 'Plant Biology', 'Vocational Technical Education', 'Engr Tech & Industrial Studies', 'Foreign Languages', 'Industrial Arts', 'Geography', 'Industrial Education', 'Wellness and Fitness', 'Recreation & Leisure Services', 'Anthropology', 'Aviation Administration', 'U.S. Culture and Education', 'Environmental Sustain Tech', 'Africana Studies', 'Liberal Arts', 'Environmental Science', 'Professional Counseling', 'Commerce', 'Philosophy', 'Aerospace Education', 'Systems Analysis', 'Environmental Science Tech', 'Pre-Law', 'Vocational Home Economics', 'Earth Science*', 'Accounting & Info Systems', 'Foods and Nutrition', 'Law Enforcement', 'Fashion Merchandising', 'Basic & Applied', 'Social Studies', 'Athletic Training', 'Construction Mgmt Technology', 'Recording Arts & Technologies', 'Art History', 'Global Studies', 'Literacy', 'Community & Public Health', 'Dance', 'Literacy Studies', 'Assessment, Lrn & Sch Imprvmnt', 'Forensic Science', 'Audio Production', 'Library Science*', 'Home Economics Education', 'Education*', 'Instrumental Music', 'Public Administration', 'Actuarial Science', 'Music Education', 'Industrial Relations', 'Marketing Education', 'Human Performance', 'Adv Stud in Teaching Learning', 'French', 'Management/Marketing', 'School Music', 'Pre Professional', 'Media and Communication', 'Fermentation Science', 'Leisure and Sport Management', 'Data Science', 'Exercise Science & Healt Promo', 'Interactive Media', 'Risk Management & Insurance', 'Horse Science', 'Health & Physical Education', 'Dyslexic Studies', 'Natural Science', 'Music Arts', 'Spanish', 'School Counseling', 'Academic Focus', 'Fashion', 'German', 'Physical Science', 'College & University Teaching', 'Public Health', 'Aeronautical Science', 'Middle Level Education', 'Business Innovation & Entr', 'Dietetics', 'Curriculum & Inst/Psychology', 'Industrial Arts - General', 'Family Nurse Practitioner', 'Other'],
    value='Other',
    description='Major:',
    disabled=False,
)

#alumni zip code
zip_code = widgets.BoundedIntText(
   # description="Primary Graduation Year:",
    value=37130,
    min=601,
    max=99999
)

# if an alumnus' address is active or inactive
address_active = widgets.RadioButtons(
    options=[0, 1],
    value = 1,
    disabled=False
)

# alumni activity count
activity_count = widgets.IntText(
    description='Count:',
    disabled=False
)

alumni_data = widgets.Accordion(children=[sex, age, grad_year, zip_code, address_active, activity_count, major])
alumni_data.set_title(0, 'Sex')
alumni_data.set_title(1, 'Age')
alumni_data.set_title(2, 'Graduation Year')
alumni_data.set_title(3, 'Zip Code')
alumni_data.set_title(4, 'Address Active')
alumni_data.set_title(5, 'Activity Count')
alumni_data.set_title(6, 'Major')

## Alumni Data Entry
#### Please enter all alumni data appropriately into the widgets below. 

In [None]:
display(alumni_data)

In [None]:
[alumni_data.children[i].value for i in range(len(alumni_data.children))]

In [None]:
data = [[alumni_data.children[i].value for i in range(len(alumni_data.children))]]

alumni = pd.DataFrame(data, columns=['Sex', 'Age', 'Graduation_Year', 'Zip', 'Address_Active',
       'Activity_Count', 'Major'])

## Survey Data Download
#### Please download compiled survey data from US Census and American Community Survey

In [None]:
!wget -O survey_data.csv https://csci7850-f22-semesterproject.nyc3.digitaloceanspaces.com/survey_data.csv

In [None]:
survey = pd.read_csv("survey_data.csv")

In [None]:
with_NA = survey[survey.columns[survey.isna().any()]].columns
for column_name in with_NA:
    #print(column_name)
    x = survey[column_name].mean()
    survey[column_name].fillna(x, inplace = True)

## Merge Alumni and Survey Data
#### Combine the data on the zip code of the alumni and perform data cleaning steps to prepare data for the model

In [None]:
all_data = pd.merge(alumni, 
                      survey, 
                      on ='Zip', 
                      how ='inner')
all_data = all_data.drop("Zip", axis=1)

In [None]:
all_data

In [None]:
all_data[["Sex_Female", "Sex_Male"]]=[[0,1] if all_data['Sex'][0] == 'M' else [1,0]]
all_data = all_data.drop("Sex", axis=1)

In [None]:
major_dict = {'Agriculture': 0, 'Psychology': 1, 'Animal Science': 2, 'Biology': 3, 'Aerospace Administration': 4, 'Information Systems': 5, 'Social Work': 6, 'Sociology': 7, 'Health, Physical Ed & Rec': 8, 'Interdisciplinary Studies': 9, 'Accounting': 10, 'Physical Education': 11, 'Geosciences': 12, 'Mass Communication': 13, 'Political Science': 14, 'Marketing': 15, 'Finance': 16, 'Recording Industry': 17, 'Criminal Justice Admin': 18, 'Aerospace': 19, 'History': 20, 'English': 21, 'Nursing': 22, 'Professional Studies': 23, 'Foreign Languages*': 24, 'Integrated Studies': 25, 'Communication': 26, 'Health and Human Performance': 27, 'Mathematics': 28, 'Social': 29, 'Home Economics': 30, 'Elementary Education': 31, 'Business Administration': 32, 'Industrial Studies': 33, 'Health': 34, 'Chemistry': 35, 'Business Education': 36, 'Economics': 37, 'Art': 38, 'Administration & Supervision': 39, 'General Business': 40, 'P E Elementary Schools': 41, 'Curriculum Specialist': 42, 'Reading': 43, 'Speech & Theatre': 44, 'Urban Planning': 45, 'Computer Science': 46, 'Recording Industry Mgmt': 47, 'Office Management': 48, 'Social Science': 49, 'Food Science and Technology': 50, 'Early Childhood': 51, 'Early Childhood Education': 52, 'Industrial Management': 53, 'Interior Design': 54, 'Music': 55, 'Speech': 56, 'Industrial Technology': 57, 'Speech and Hearing Therapy': 58, 'Special Education': 59, 'Management': 60, 'Medical Technology': 61, 'Professional': 62, 'Recreation': 63, 'Stenographic Business': 64, 'Health Education': 65, 'Administrative Business': 66, 'Curriculum and Instruction': 67, 'Gerontology*': 68, 'Human Sciences': 69, 'Guidance and Counseling': 70, 'University Studies': 71, 'Textiles Merchandising Design': 72, 'Plant & Soil Science': 73, 'Family & Consumer Studies': 74, 'Agribusiness': 75, 'Liberal Studies': 76, 'International Relations': 77, 'Entrepreneurship': 78, 'Engineering Technology': 79, 'Curriculum & Inst/Elem Edu*': 80, 'Industrial & Org Psychology': 81, 'Biochemistry': 82, 'Concrete Industry Management': 83, 'Organizational Communication': 84, 'Construction Management': 85, 'Science': 86, 'Exercise Science': 87, 'Speech/Language Path & Audio': 88, 'Curriculum & Inst/Ed Leadrshp': 89, 'Nutrition & Food Science': 90, 'Physics': 91, 'Global Studies & Cultural Geog': 92, 'Leisure Sprt & Tourism Studies': 93, 'Mechatronics Engineering': 94, 'Animation': 95, 'Public History': 96, 'Global Studies & Human Geog': 97, 'Religious Studies': 98, 'Tourism & Hospitality Mgmt': 99, 'Journalism': 100, 'Theatre': 101, 'Media & Entertainment': 102, 'Art Education': 103, 'International Affairs': 104, 'Video & Film Production': 105, 'Professional Science': 106, 'Applied Social Research': 107, 'Mathematics & Science Educ': 108, 'Information Technology': 109, 'Plant Biology': 110, 'Vocational Technical Education': 111, 'Engr Tech & Industrial Studies': 112, 'Foreign Languages': 113, 'Industrial Arts': 114, 'Geography': 115, 'Industrial Education': 116, 'Wellness and Fitness': 117, 'Recreation & Leisure Services': 118, 'Anthropology': 119, 'Aviation Administration': 120, 'U.S. Culture and Education': 121, 'Environmental Sustain Tech': 122, 'Africana Studies': 123, 'Liberal Arts': 124, 'Environmental Science': 125, 'Professional Counseling': 126, 'Commerce': 127, 'Philosophy': 128, 'Aerospace Education': 129, 'Systems Analysis': 130, 'Environmental Science Tech': 131, 'Pre-Law': 132, 'Vocational Home Economics': 133, 'Earth Science*': 134, 'Accounting & Info Systems': 135, 'Foods and Nutrition': 136, 'Law Enforcement': 137, 'Fashion Merchandising': 138, 'Basic & Applied': 139, 'Social Studies': 140, 'Athletic Training': 141, 'Construction Mgmt Technology': 142, 'Recording Arts & Technologies': 143, 'Art History': 144, 'Global Studies': 145, 'Literacy': 146, 'Community & Public Health': 147, 'Dance': 148, 'Literacy Studies': 149, 'Assessment, Lrn & Sch Imprvmnt': 150, 'Forensic Science': 151, 'Audio Production': 152, 'Library Science*': 153, 'Home Economics Education': 154, 'Education*': 155, 'Instrumental Music': 156, 'Public Administration': 157, 'Actuarial Science': 158, 'Music Education': 159, 'Industrial Relations': 160, 'Marketing Education': 161, 'Human Performance': 162, 'Adv Stud in Teaching Learning': 163, 'French': 164, 'Management/Marketing': 165, 'School Music': 166, 'Pre Professional': 167, 'Media and Communication': 168, 'Fermentation Science': 169, 'Leisure and Sport Management': 170, 'Data Science': 171, 'Exercise Science & Healt Promo': 172, 'Interactive Media': 173, 'Risk Management & Insurance': 174, 'Horse Science': 175, 'Health & Physical Education': 176, 'Dyslexic Studies': 177, 'Natural Science': 178, 'Music Arts': 179, 'Spanish': 180, 'School Counseling': 181, 'Academic Focus': 182, 'Fashion': 183, 'German': 184, 'Physical Science': 185, 'College & University Teaching': 186, 'Public Health': 187, 'Aeronautical Science': 188, 'Middle Level Education': 189, 'Business Innovation & Entr': 190, 'Dietetics': 191, 'Curriculum & Inst/Psychology': 192, 'Industrial Arts - General': 193, 'Family Nurse Practitioner': 194, 'Other': 195}
# put the rest of the major dictionary here. Split data for each model....figure it out later
all_data["Major"] = all_data["Major"].replace(major_dict)
save_column = all_data["Major"]
all_data.drop("Major", axis=1, inplace=True)
all_data["Major"] = save_column

In [None]:
#seperate numerical and categorical data -- preprocess numerical data
X = all_data.iloc[:, :].values
x1 = X[:,:-1]
x1 = preprocess(x1)
x2 = X[:,-1:]

## Download Donor Categorization Model
#### This model classifies alumni donors into 5 categories, given the data above. 

In [None]:
#get the pretrained model
!wget -O donor_categorization.h5 https://csci7850-f22-semesterproject.nyc3.digitaloceanspaces.com/donor_categorization.h5

In [None]:
#load and graph the model
model = keras.models.load_model("donor_categorization.h5")

#uncomment to plot model
#keras.utils.plot_model(model,show_shapes=True,expand_nested=True)

## Donor Category Final Prediction
#### The model will predict which category of donor the alumni will fall under. 

In [None]:
prediction = model.predict([x1,x2])[0]
donor_category = np.argmax(prediction)
prediction

In [None]:
if donor_category == 0:
    print("Donor is in class 0. They are expected to give a gift <= $10.")
elif donor_category == 1:
    print("Donor is in class 1. They are expected to give a gift > $10 and < $21")
elif donor_category == 2:
    print("Donor is in class 2. They are expected to give a gift >= $21 and < $50")
elif donor_category == 3:
    print("Donor is in class 3. They are expected to give a gift >= $50 and < $100")          
elif donor_category == 4:
    print("Donor is in class 4. They are expected to give a gift >= $100")        