## Donor Classification Demo

In [None]:
#import all necessary tools for the demo
import ipywidgets as widgets
import pandas as pd
import numpy as np
import tensorflow.keras as keras

In [None]:
#preprocess function for numerical data
mean = 7011.6283
std = 17785.2253
def preprocess(x):
    return (x - mean) / std

In [None]:
# Define all widgets for alumni data entry

#alumni sex
sex = widgets.RadioButtons(
    options=['M', 'F'],
    disabled=False
)

#alumni age
age = widgets.IntText(
    value = 22,
    description='Age:',
    disabled=False
)

#alumni primary graduation year (If the alumni has multiple degrees from the university, they will choose the graduation year of their first degree
grad_year = widgets.BoundedIntText(
   # description="Primary Graduation Year:",
    value=2022,
    min=1920,
    max = 2099
)

#alumni major
major = widgets.Dropdown(
    options=['Curriculum & Inst/Ed Leadrshp', 'Psychology', 'Mathematics', 'Criminal Justice Admin', 'Marketing', 'Industrial Technology', 'Speech & Theatre', 'Foreign Languages*', 'Organizational Communication', 'Computer Science', 'Mass Communication', 'Administration & Supervision', 'University Studies', 'Early Childhood Education', 'Geosciences', 'Political Science', 'Liberal Studies', 'Information Systems', 'Business Administration', 'Nursing', 'Interdisciplinary Studies', 'Textiles Merchandising Design', 'English', 'Special Education', 'Adv Stud in Teaching Learning', 'Accounting', 'Agribusiness', 'Concrete Industry Management', 'Management', 'Entrepreneurship', 'Physical Education', 'Finance', 'Recording Industry', 'Engineering Technology', 'Recreation & Leisure Services', 'Social Work', 'Business Education', 'Plant & Soil Science', 'Music', 'Philosophy', 'Art Education', 'Health Education', 'Biology', 'Family & Consumer Studies', 'Aerospace', 'Engr Tech & Industrial Studies', 'Art', 'Interior Design', 'Reading', 'Science', 'History', 'Professional Studies', 'Construction Mgmt Technology', 'Animal Science', 'Accounting & Info Systems', 'Industrial Management', 'Environmental Science Tech', 'International Relations', 'Human Sciences', 'Dyslexic Studies', 'Chemistry', 'Sociology', 'Office Management', 'Health, Physical Ed & Rec', 'Construction Management', 'Curriculum & Inst/Elem Edu*', 'Anthropology', 'Global Studies', 'Recording Arts & Technologies', 'Economics', 'Physics', 'Exercise Science', 'Professional Science', 'Nutrition & Food Science', 'Art History', 'Professional Counseling', 'Biochemistry', 'Gerontology*', 'Industrial & Org Psychology', 'Literacy', 'Health and Human Performance', 'Theatre', 'Leisure Sprt & Tourism Studies', 'Human Performance', 'Integrated Studies', 'Leisure and Sport Management', 'Speech/Language Path & Audio', 'Literacy Studies', 'Environmental Sustain Tech', 'Global Studies & Cultural Geog', 'Forensic Science', 'Computational Science', 'Assessment, Lrn & Sch Imprvmnt', 'Community & Public Health', 'Mechatronics Engineering', 'Library Science*', 'International Affairs', 'Journalism', 'Media & Entertainment', 'Communication', 'Dance', 'Global Studies & Human Geog', 'Animation', 'Foreign Languages', 'Environmental Science', 'Commerce', 'Fermentation Science', 'Religious Studies', 'Elementary Education', 'Video & Film Production', 'Audio Production', 'Horse Science', 'Aeronautical Science', 'Interactive Media', 'Data Science', 'Tourism & Hospitality Mgmt', 'Athletic Training', 'Actuarial Science', 'Liberal Arts', 'Supply Chain Management', 'Health Care Management', 'Exercise Science & Healt Promo', 'Family Nurse Practitioner', 'U.S. Culture and Education', 'College & University Teaching', 'Mathematics & Science Educ', 'Molecular Biosciences', 'Media and Communication', 'Risk Management & Insurance', 'Curriculum & Inst/Psychology', 'Public History', 'Public Health', 'Aerospace Education', 'Aviation Administration', 'Africana Studies', 'Road Construction Technology', 'Business Innovation & Entr', 'School Counseling', 'Applied Social Research', 'Middle Level Education', 'Academic Focus', "Women's and Gender Studies", 'Museum Management', 'Assurance', 'Archival Management', 'Psychiatric Mental Health NP', 'Aviation Business', 'Leisure, Sport, & Tourism Mgmt', 'Other'],
    value='Other',
    description='Major:',
    disabled=False,
)

#alumni zip code
zip_code = widgets.BoundedIntText(
   # description="Primary Graduation Year:",
    value=37130,
    min=601,
    max=99999
)

# if an alumnus' address is active or inactive
address_active = widgets.RadioButtons(
    options=[0, 1],
    value = 1,
    disabled=False
)

# alumni activity count
activity_count = widgets.IntText(
    description='Count:',
    disabled=False
)

alumni_data = widgets.Accordion(children=[sex, age, grad_year, zip_code, address_active, activity_count, major])
alumni_data.set_title(0, 'Sex')
alumni_data.set_title(1, 'Age')
alumni_data.set_title(2, 'Graduation Year')
alumni_data.set_title(3, 'Zip Code')
alumni_data.set_title(4, 'Address Active')
alumni_data.set_title(5, 'Activity Count')
alumni_data.set_title(6, 'Major')

## Alumni Data Entry
#### Please enter all alumni data appropriately into the widgets below. 

In [None]:
display(alumni_data)

In [None]:
#verify all information
[alumni_data.children[i].value for i in range(len(alumni_data.children))]

In [None]:
data = [[alumni_data.children[i].value for i in range(len(alumni_data.children))]]

alumni = pd.DataFrame(data, columns=['Sex', 'Age', 'Graduation_Year', 'Zip', 'Address_Active',
       'Activity_Count', 'Major'])

## Survey Data Download
#### Please download compiled survey data from US Census and American Community Survey

In [None]:
!wget -O survey_data.csv https://csci7850-f22-semesterproject.nyc3.digitaloceanspaces.com/survey_data.csv

In [None]:
#load survey information
survey = pd.read_csv("survey_data.csv")

In [None]:
#fill NA values with mean values
with_NA = survey[survey.columns[survey.isna().any()]].columns
for column_name in with_NA:
    #print(column_name)
    x = survey[column_name].mean()
    survey[column_name].fillna(x, inplace = True)

## Merge Alumni and Survey Data
#### Combine the data on the zip code of the alumni and perform data cleaning steps to prepare data for the model

In [None]:
all_data = pd.merge(alumni, 
                      survey, 
                      on ='Zip', 
                      how ='inner')
all_data = all_data.drop("Zip", axis=1)

In [None]:
#encode male/female
all_data[["Sex_Female", "Sex_Male"]]=[[0,1] if all_data['Sex'][0] == 'M' else [1,0]]
all_data = all_data.drop("Sex", axis=1)

In [None]:
#encode major
major_dict = {'Curriculum & Inst/Ed Leadrshp': 0, 'Psychology': 1, 'Mathematics': 2, 'Criminal Justice Admin': 3, 'Marketing': 4, 'Industrial Technology': 5, 'Speech & Theatre': 6, 'Foreign Languages*': 7, 'Organizational Communication': 8, 'Computer Science': 9, 'Mass Communication': 10, 'Administration & Supervision': 11, 'University Studies': 12, 'Early Childhood Education': 13, 'Geosciences': 14, 'Political Science': 15, 'Liberal Studies': 16, 'Information Systems': 17, 'Business Administration': 18, 'Nursing': 19, 'Interdisciplinary Studies': 20, 'Textiles Merchandising Design': 21, 'English': 22, 'Special Education': 23, 'Adv Stud in Teaching Learning': 24, 'Accounting': 25, 'Agribusiness': 26, 'Concrete Industry Management': 27, 'Management': 28, 'Entrepreneurship': 29, 'Physical Education': 30, 'Finance': 31, 'Recording Industry': 32, 'Engineering Technology': 33, 'Recreation & Leisure Services': 34, 'Social Work': 35, 'Business Education': 36, 'Plant & Soil Science': 37, 'Music': 38, 'Philosophy': 39, 'Art Education': 40, 'Health Education': 41, 'Biology': 42, 'Family & Consumer Studies': 43, 'Aerospace': 44, 'Engr Tech & Industrial Studies': 45, 'Art': 46, 'Interior Design': 47, 'Reading': 48, 'Science': 49, 'History': 50, 'Professional Studies': 51, 'Construction Mgmt Technology': 52, 'Animal Science': 53, 'Accounting & Info Systems': 54, 'Industrial Management': 55, 'Environmental Science Tech': 56, 'International Relations': 57, 'Human Sciences': 58, 'Dyslexic Studies': 59, 'Chemistry': 60, 'Sociology': 61, 'Office Management': 62, 'Health, Physical Ed & Rec': 63, 'Construction Management': 64, 'Curriculum & Inst/Elem Edu*': 65, 'Anthropology': 66, 'Global Studies': 67, 'Recording Arts & Technologies': 68, 'Economics': 69, 'Physics': 70, 'Exercise Science': 71, 'Professional Science': 72, 'Nutrition & Food Science': 73, 'Art History': 74, 'Professional Counseling': 75, 'Biochemistry': 76, 'Gerontology*': 77, 'Industrial & Org Psychology': 78, 'Literacy': 79, 'Health and Human Performance': 80, 'Theatre': 81, 'Leisure Sprt & Tourism Studies': 82, 'Human Performance': 83, 'Integrated Studies': 84, 'Leisure and Sport Management': 85, 'Speech/Language Path & Audio': 86, 'Literacy Studies': 87, 'Environmental Sustain Tech': 88, 'Global Studies & Cultural Geog': 89, 'Forensic Science': 90, 'Computational Science': 91, 'Assessment, Lrn & Sch Imprvmnt': 92, 'Community & Public Health': 93, 'Mechatronics Engineering': 94, 'Library Science*': 95, 'International Affairs': 96, 'Journalism': 97, 'Media & Entertainment': 98, 'Communication': 99, 'Dance': 100, 'Global Studies & Human Geog': 101, 'Animation': 102, 'Foreign Languages': 103, 'Environmental Science': 104, 'Commerce': 105, 'Fermentation Science': 106, 'Religious Studies': 107, 'Elementary Education': 108, 'Video & Film Production': 109, 'Audio Production': 110, 'Horse Science': 111, 'Aeronautical Science': 112, 'Interactive Media': 113, 'Data Science': 114, 'Tourism & Hospitality Mgmt': 115, 'Athletic Training': 116, 'Actuarial Science': 117, 'Liberal Arts': 118, 'Supply Chain Management': 119, 'Health Care Management': 120, 'Exercise Science & Healt Promo': 121, 'Family Nurse Practitioner': 122, 'U.S. Culture and Education': 123, 'College & University Teaching': 124, 'Mathematics & Science Educ': 125, 'Molecular Biosciences': 126, 'Media and Communication': 127, 'Risk Management & Insurance': 128, 'Curriculum & Inst/Psychology': 129, 'Public History': 130, 'Public Health': 131, 'Aerospace Education': 132, 'Aviation Administration': 133, 'Africana Studies': 134, 'Road Construction Technology': 135, 'Business Innovation & Entr': 136, 'School Counseling': 137, 'Applied Social Research': 138, 'Middle Level Education': 139, 'Academic Focus': 140, "Women's and Gender Studies": 141, 'Museum Management': 142, 'Assurance': 143, 'Archival Management': 144, 'Psychiatric Mental Health NP': 145, 'Aviation Business': 146, 'Leisure, Sport, & Tourism Mgmt': 147, 'Other': 148}
# put the rest of the major dictionary here. Split data for each model....figure it out later
all_data["Major"] = all_data["Major"].replace(major_dict)

#append major column to the end
save_column = all_data["Major"]
all_data.drop("Major", axis=1, inplace=True)
all_data["Major"] = save_column

In [None]:
#seperate numerical and categorical data -- preprocess numerical data
X = all_data.iloc[:, :].values
x1 = X[:,:-1]
x1 = preprocess(x1)
x2 = X[:,-1:]

## Download Donor/Non-Donor Classification Model
#### This model classifies Alumni as donors or non-donors, given the data above. 

In [None]:
#get the pretrained model
!wget -O donor_classification.h5 https://csci7850-f22-semesterproject.nyc3.digitaloceanspaces.com/donor_classification.h5

In [None]:
#load and graph the model
model = keras.models.load_model("donor_classification.h5")
#keras.utils.plot_model(model,show_shapes=True,expand_nested=True)

## Donor Classification Final Prediction
#### If model predicts less than 0.5, the Alumni is predicted to be a non-donor. Otherwise, the Alumni is predicted to be a donor

In [None]:
prediction = model.predict([x1,x2])[0][0]
if(prediction < 0.5):
    print("Alumni is a non-donor.")
else:
    print("Alumni is a donor.")