# Imports

In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# From our Data Directory
from pathlib import Path
data_dir = Path('data')
outputs_dir = Path('outputs')

# Uploading and Cleaning Data

In [2]:
student_data = pd.read_csv(data_dir/'student_data.csv', sep = ';' )

In [3]:
print(student_data.columns)

Index(['Marital status', 'Application mode', 'Application order', 'Course',
       'Daytime/evening attendance\t', 'Previous qualification',
       'Previous qualification (grade)', 'Nacionality',
       'Mother's qualification', 'Father's qualification',
       'Mother's occupation', 'Father's occupation', 'Admission grade',
       'Displaced', 'Educational special needs', 'Debtor',
       'Tuition fees up to date', 'Gender', 'Scholarship holder',
       'Age at enrollment', 'International',
       'Curricular units 1st sem (credited)',
       'Curricular units 1st sem (enrolled)',
       'Curricular units 1st sem (evaluations)',
       'Curricular units 1st sem (approved)',
       'Curricular units 1st sem (grade)',
       'Curricular units 1st sem (without evaluations)',
       'Curricular units 2nd sem (credited)',
       'Curricular units 2nd sem (enrolled)',
       'Curricular units 2nd sem (evaluations)',
       'Curricular units 2nd sem (approved)',
       'Curricular units 2nd

#### Clean Student Data

In [4]:
# We are going to do an EDA on age at enrollment, debtor (whether the student is going into debt) and admission grade. 
# We will then examine their relationship to our target variable (whether the student dropped out or graduated).

clean_sd = student_data[['Previous qualification (grade)','Debtor', 'Age at enrollment', 
                         'Admission grade', 'Target', 'Tuition fees up to date', "Mother's qualification", "Father's qualification"]]

# Removing the 794 students that are marked as enrolled
clean_sd = clean_sd[clean_sd["Target"] != "Enrolled"]

## Exploring 'Mother's occupation' and 'Father's qualification' for categorization

In [5]:
mothers_quals = sorted(clean_sd["Mother's qualification"].unique())
fathers_quals = sorted(clean_sd["Father's qualification"].unique())

# No 13, 20, 25, 31, 33
print(mothers_quals)

print(fathers_quals)

len(fathers_quals)
# shared_values = set(mothers_quals) & set(fathers_quals)
# shared_values

[1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 14, 18, 19, 22, 26, 27, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44]
[1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 13, 14, 18, 19, 20, 22, 25, 26, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44]


34

In [6]:
allowed_values = {1, 2, 3, 4, 5, 18, 22, 35, 39, 41, 42}

clean_sd_trial = clean_sd[clean_sd["Father's qualification"].isin(allowed_values) & clean_sd["Mother's qualification"].isin(allowed_values)]

In [7]:
clean_sd_trial

Unnamed: 0,Previous qualification (grade),Debtor,Age at enrollment,Admission grade,Target,Tuition fees up to date,Mother's qualification,Father's qualification
1,160.0,0,19,142.5,Graduate,0,1,3
8,137.0,0,21,129.3,Graduate,1,1,1
13,110.0,0,21,111.8,Graduate,1,1,1
20,122.0,0,21,120.3,Graduate,1,1,1
31,125.0,0,20,130.0,Graduate,1,42,3
...,...,...,...,...,...,...,...,...
4403,137.0,0,19,124.8,Graduate,1,1,1
4404,133.1,1,28,120.0,Dropout,0,2,1
4417,132.0,0,20,133.8,Graduate,1,1,1
4419,125.0,0,19,122.2,Graduate,1,1,1


In [8]:
# 1 - Secondary Education - 12th Year of Schooling or Eq. 
# 2 - Higher Education - Bachelor's Degree 
# 3 - Higher Education - Degree 
# 4 - Higher Education - Master's 
# 5 - Higher Education - Doctorate 
# 6 - Frequency of Higher Education 
# 9 - 12th Year of Schooling - Not Completed 
# 10 - 11th Year of Schooling - Not Completed 
# 11 - 7th Year (Old) 
# 12 - Other - 11th Year of Schooling 
# 14 - 10th Year of Schooling 
# 18 - General commerce course 
# 19 - Basic Education 3rd Cycle (9th/10th/11th Year) or Equiv. 
# 22 - Technical-professional course 
# 26 - 7th year of schooling 
# 27 - 2nd cycle of the general high school course 
# 29 - 9th Year of Schooling - Not Completed 
# 30 - 8th year of schooling 
# 34 - Unknown 
# 35 - Can't read or write 
# 36 - Can read without having a 4th year of schooling 
# 37 - Basic education 1st cycle (4th/5th year) or equiv. 
# 38 - Basic Education 2nd Cycle (6th/7th/8th Year) or Equiv. 
# 39 - Technological specialization course 
# 40 - Higher education - degree (1st cycle) 
# 41 - Specialized higher studies course 
# 42 - Professional higher technical course 
# 43 - Higher Education - Master (2nd cycle) 
# 44 - Higher Education - Doctorate (3rd cycle)

# -------

# Unknown - 0
# Illiterate - 1
# Some Level of Elementary School Education -  2
# Elemenatary School (Completed) - 3
# Middle School Education (Completed) - 4
# High School (Completed) - 5
# Process of Getting Bachelors - 6
# Completed Bachelors - 7
# Process of Getting Masters - 8
# Completed Masters - 9
# Process of Getting Doctorate - 10
# Completed Doctorate - 11

Categorizing the 34 previous categories into 12 categories to better understand the data.

In [9]:
# Complete data including new codes
data = {
    "Code": [1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 14, 18, 19, 22, 26, 27, 29, 30, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 34,
             13, 20, 25, 31, 33],
    "Original Description": [
        "Secondary Education - 12th Year of Schooling or Eq.",
        "Higher Education - Bachelor's Degree",
        "Higher Education - Degree",
        "Higher Education - Master's",
        "Higher Education - Doctorate",
        "Frequency of Higher Education",
        "12th Year of Schooling - Not Completed",
        "11th Year of Schooling - Not Completed",
        "7th Year (Old)",
        "Other - 11th Year of Schooling",
        "10th Year of Schooling",
        "General commerce course",
        "Basic Education 3rd Cycle (9th/10th/11th Year) or Equiv.",
        "Technical-professional course",
        "7th year of schooling",
        "2nd cycle of the general high school course",
        "9th Year of Schooling - Not Completed",
        "8th year of schooling",
        "Can't read or write",
        "Can read without having a 4th year of schooling",
        "Basic education 1st cycle (4th/5th year) or equiv.",
        "Basic Education 2nd Cycle (6th/7th/8th Year) or Equiv.",
        "Technological specialization course",
        "Higher education - degree (1st cycle)",
        "Specialized higher studies course",
        "Professional higher technical course",
        "Higher Education - Master (2nd cycle)",
        "Higher Education - Doctorate (3rd cycle)",
        "Unknown",
        "2nd year complementary high school course",
        "Complementary High School Course",
        "Complementary High School Course – not concluded",
        "General Course of Administration and Commerce",
        "Supplementary Accounting and Administration"
    ],
    "Category": [
        "High School (Completed)",
        "Completed Bachelors",
        "Completed Bachelors",
        "Completed Masters",
        "Completed Doctorate",
        "Process of Getting Bachelors",
        "Middle School Education (Completed)",
        "Middle School Education (Completed)",
        "Elementary School (Completed)",
        "Middle School Education (Completed)",
        "Middle School Education (Completed)",
        "Professional Course",
        "Middle School Education (Completed)",
        "Professional Course",
        "Elementary School (Completed)",
        "High School (Completed)",
        "Middle School Education (Completed)",
        "Middle School Education (Completed)",
        "Illiterate",
        "Some Level of Elementary School Education",
        "Elementary School (Completed)",
        "Some Level of Elementary School Education",
        "Professional Course",
        "Process of Getting Bachelors",
        "Process of Getting Masters",
        "Professional Course",
        "Completed Masters",
        "Completed Doctorate",
        "Unknown",
        "Process of Getting Bachelors",
        "Process of Getting Bachelors",
        "Process of Getting Bachelors",
        "High School (Completed)",
        "Professional Course"
    ]
}

# Mapping categories to numeric codes
category_to_code = {
    "Unknown": 0,
    "Illiterate": 1,
    "Some Level of Elementary School Education": 2,
    "Elementary School (Completed)": 3,
    "Middle School Education (Completed)": 4,
    "High School (Completed)": 5,
    "Process of Getting Bachelors": 6,
    "Completed Bachelors": 7,
    "Process of Getting Masters": 8,
    "Completed Masters": 9,
    "Process of Getting Doctorate": 10,
    "Completed Doctorate": 11,
    "Professional Course": 2  # same as Some Level of Elementary or adjust if desired
}

# Create DataFrame
education_df = pd.DataFrame(data)

# Add numeric category codes
education_df["Category_Code"] = education_df["Category"].map(category_to_code)

# Display
education_df

Unnamed: 0,Code,Original Description,Category,Category_Code
0,1,Secondary Education - 12th Year of Schooling o...,High School (Completed),5
1,2,Higher Education - Bachelor's Degree,Completed Bachelors,7
2,3,Higher Education - Degree,Completed Bachelors,7
3,4,Higher Education - Master's,Completed Masters,9
4,5,Higher Education - Doctorate,Completed Doctorate,11
5,6,Frequency of Higher Education,Process of Getting Bachelors,6
6,9,12th Year of Schooling - Not Completed,Middle School Education (Completed),4
7,10,11th Year of Schooling - Not Completed,Middle School Education (Completed),4
8,11,7th Year (Old),Elementary School (Completed),3
9,12,Other - 11th Year of Schooling,Middle School Education (Completed),4


Now mapping the codes and new categories to our old clean_sd dataframe.

In [10]:
# Create mapping dictionaries
code_to_category = dict(zip(education_df["Code"], education_df["Category"]))
code_to_code = dict(zip(education_df["Code"], education_df["Category_Code"]))

# Map Mother's qualification
clean_sd["Mother_edu_category"] = clean_sd["Mother's qualification"].map(code_to_category)
clean_sd["Mother_edu_code"] = clean_sd["Mother's qualification"].map(code_to_code).fillna(0).astype(int)

# Map Father's qualification
clean_sd["Father_edu_category"] = clean_sd["Father's qualification"].map(code_to_category)
clean_sd["Father_edu_code"] = clean_sd["Father's qualification"].map(code_to_code).fillna(0).astype(int)

# New DF
clean_sd

Unnamed: 0,Previous qualification (grade),Debtor,Age at enrollment,Admission grade,Target,Tuition fees up to date,Mother's qualification,Father's qualification,Mother_edu_category,Mother_edu_code,Father_edu_category,Father_edu_code
0,122.0,0,20,127.3,Dropout,1,19,12,Middle School Education (Completed),4,Middle School Education (Completed),4
1,160.0,0,19,142.5,Graduate,0,1,3,High School (Completed),5,Completed Bachelors,7
2,122.0,0,19,124.8,Dropout,0,37,37,Elementary School (Completed),3,Elementary School (Completed),3
3,122.0,0,20,119.6,Graduate,1,38,37,Some Level of Elementary School Education,2,Elementary School (Completed),3
4,100.0,0,45,141.5,Graduate,1,37,38,Elementary School (Completed),3,Some Level of Elementary School Education,2
...,...,...,...,...,...,...,...,...,...,...,...,...
4419,125.0,0,19,122.2,Graduate,1,1,1,High School (Completed),5,High School (Completed),5
4420,120.0,1,18,119.0,Dropout,0,1,1,High School (Completed),5,High School (Completed),5
4421,154.0,0,30,149.5,Dropout,1,37,37,Elementary School (Completed),3,Elementary School (Completed),3
4422,180.0,0,20,153.8,Graduate,1,37,37,Elementary School (Completed),3,Elementary School (Completed),3


### Exploring Data

In [11]:
# 139 parents have an unknown level of education
# Students "enrolled" ~ 800

# Dataframe without the 'unknown' category
clean_sd_education = clean_sd[
    (clean_sd["Mother's qualification"] != 34) &
    (clean_sd["Father's qualification"] != 34)
]

clean_sd_education

Unnamed: 0,Previous qualification (grade),Debtor,Age at enrollment,Admission grade,Target,Tuition fees up to date,Mother's qualification,Father's qualification,Mother_edu_category,Mother_edu_code,Father_edu_category,Father_edu_code
0,122.0,0,20,127.3,Dropout,1,19,12,Middle School Education (Completed),4,Middle School Education (Completed),4
1,160.0,0,19,142.5,Graduate,0,1,3,High School (Completed),5,Completed Bachelors,7
2,122.0,0,19,124.8,Dropout,0,37,37,Elementary School (Completed),3,Elementary School (Completed),3
3,122.0,0,20,119.6,Graduate,1,38,37,Some Level of Elementary School Education,2,Elementary School (Completed),3
4,100.0,0,45,141.5,Graduate,1,37,38,Elementary School (Completed),3,Some Level of Elementary School Education,2
...,...,...,...,...,...,...,...,...,...,...,...,...
4419,125.0,0,19,122.2,Graduate,1,1,1,High School (Completed),5,High School (Completed),5
4420,120.0,1,18,119.0,Dropout,0,1,1,High School (Completed),5,High School (Completed),5
4421,154.0,0,30,149.5,Dropout,1,37,37,Elementary School (Completed),3,Elementary School (Completed),3
4422,180.0,0,20,153.8,Graduate,1,37,37,Elementary School (Completed),3,Elementary School (Completed),3


Previous qualification (grade) & Admission grade is from a scale of 0 - 200



Debtor = 1 means that yes they going into debt



Tuition fees up to date = 1 means that yes they are up to date

In [73]:
num_mom_cat = clean_sd['Mother_edu_category'].value_counts()
num_dad_cat = clean_sd['Father_edu_category'].value_counts()

In [16]:
num_mom_cat

Mother_edu_category
High School (Completed)                      866
Elementary School (Completed)                844
Middle School Education (Completed)          835
Some Level of Elementary School Education    474
Completed Bachelors                          397
Unknown                                      127
Completed Masters                             37
Completed Doctorate                           17
Professional Course                           12
Process of Getting Bachelors                  12
Process of Getting Masters                     6
Illiterate                                     3
Name: count, dtype: int64

In [17]:
num_dad_cat

Father_edu_category
Elementary School (Completed)                1022
Middle School Education (Completed)           836
High School (Completed)                       734
Some Level of Elementary School Education     583
Completed Bachelors                           263
Unknown                                       109
Completed Masters                              34
Professional Course                            20
Completed Doctorate                            16
Process of Getting Bachelors                   10
Illiterate                                      2
Process of Getting Masters                      1
Name: count, dtype: int64

In [57]:
# Dataframe with just the 'Elementary School (Some Level)' category
clean_sd_elemen_sl = clean_sd[
    (clean_sd["Mother_edu_code"] == 2) &
    (clean_sd["Father_edu_code"] == 2)
]

# Dataframe with just the 'Elementary School (Completed)' category
clean_sd_elementary = clean_sd[
    (clean_sd["Mother_edu_code"] == 3) &
    (clean_sd["Father_edu_code"] == 3)
]

# Dataframe with just the 'Middle School (Completed)' category
clean_sd_middle = clean_sd[
    (clean_sd["Mother_edu_code"] == 4) &
    (clean_sd["Father_edu_code"] == 4)
]

# Dataframe with just the 'High School (Completed)' category
clean_sd_high = clean_sd[
    (clean_sd["Mother_edu_code"] == 5) &
    (clean_sd["Father_edu_code"] == 5)
]

# Dataframe with just the 'Bachelors (Completed)' category
clean_sd_bachelor = clean_sd[
    (clean_sd["Mother_edu_code"] == 7) &
    (clean_sd["Father_edu_code"] == 7)
]

In [58]:
elem_sl_count = clean_sd_elemen_sl['Target'].value_counts()
elem_count = clean_sd_elementary['Target'].value_counts()
midd_count = clean_sd_middle['Target'].value_counts()
high_count = clean_sd_high['Target'].value_counts()
bach_count = clean_sd_bachelor['Target'].value_counts()

In [59]:
print("Elementary School Some Level Count ")
print(elem_sl_count)
print()
print("Elementary School Count ")
print(elem_count)
print()
print("Middle School Count ")
print(midd_count)
print()
print("High School Count ")
print(high_count)
print()
print("Bachelors Count ")
print(bach_count)

Elementary School Some Level Count 
Target
Graduate    130
Dropout      49
Name: count, dtype: int64

Elementary School Count 
Target
Graduate    308
Dropout     286
Name: count, dtype: int64

Middle School Count 
Target
Graduate    209
Dropout     118
Name: count, dtype: int64

High School Count 
Target
Graduate    212
Dropout     132
Name: count, dtype: int64

Bachelors Count 
Target
Graduate    76
Dropout     57
Name: count, dtype: int64


In [60]:
elem_sl_grad_percent = ((elem_sl_count.iloc[0]) / (elem_sl_count.iloc[0] + elem_sl_count.iloc[1])) * 100
elem_sl_drop_percent = ((elem_sl_count.iloc[1]) / (elem_sl_count.iloc[0] + elem_sl_count.iloc[1])) * 100

elem_grad_percent = ((elem_count.iloc[0]) / (elem_count.iloc[0] + elem_count.iloc[1])) * 100
elem_drop_percent = ((elem_count.iloc[1]) / (elem_count.iloc[0] + elem_count.iloc[1])) * 100

midd_grad_percent = ((midd_count.iloc[0]) / (midd_count.iloc[0] + midd_count.iloc[1])) * 100
midd_drop_percent = ((midd_count.iloc[1]) / (midd_count.iloc[0] + midd_count.iloc[1])) * 100

high_grad_percent = ((high_count.iloc[0]) / (high_count.iloc[0] + high_count.iloc[1])) * 100
high_drop_percent = ((high_count.iloc[1]) / (high_count.iloc[0] + high_count.iloc[1])) * 100

bach_grad_percent = ((bach_count.iloc[0]) / (bach_count.iloc[0] + bach_count.iloc[1])) * 100
bach_drop_percent = ((bach_count.iloc[1]) / (bach_count.iloc[0] + bach_count.iloc[1])) * 100

In [61]:
print(f"Students whose parents completed Some Level of Elementary School and Graduated: {elem_sl_grad_percent}%")
print(f"Students whose parents completed Elementary School and Dropped Out: {elem_sl_drop_percent}%")
print()
print(f"Students whose parents completed Elementary School and Graduated: {elem_grad_percent}%")
print(f"Students whose parents completed Elementary School and Dropped Out: {elem_drop_percent}%")
print()
print(f"Students whose parents completed Middle School and Graduated: {midd_grad_percent}%")
print(f"Students whose parents completed Middle School and Dropped Out: {midd_drop_percent}%")
print()
print(f"Students whose parents completed High School and Graduated: {high_grad_percent}%")
print(f"Students whose parents completed High School and Dropped Out: {high_drop_percent}%")
print()
print(f"Students whose parents completed their Bachelors and Graduated: {bach_grad_percent}%")
print(f"Students whose parents completed their Bachelors and Dropped Out: {bach_drop_percent}%")

Students whose parents completed Some Level of Elementary School and Graduated: 72.62569832402235%
Students whose parents completed Elementary School and Dropped Out: 27.37430167597765%

Students whose parents completed Elementary School and Graduated: 51.85185185185185%
Students whose parents completed Elementary School and Dropped Out: 48.148148148148145%

Students whose parents completed Middle School and Graduated: 63.91437308868502%
Students whose parents completed Middle School and Dropped Out: 36.08562691131498%

Students whose parents completed High School and Graduated: 61.627906976744185%
Students whose parents completed High School and Dropped Out: 38.372093023255815%

Students whose parents completed their Bachelors and Graduated: 57.14285714285714%
Students whose parents completed their Bachelors and Dropped Out: 42.857142857142854%


In [62]:
# Dataframe with just the 'Elementary School (Completed)' category for Moms
clean_sd_elemen_sl_m = clean_sd[(clean_sd["Mother_edu_code"] == 2) ]

# Dataframe with just the 'Elementary School (Completed)' category for Moms
clean_sd_elementary_m = clean_sd[(clean_sd["Mother_edu_code"] == 3) ]

# Dataframe with just the 'Middle School (Completed)' category for Moms
clean_sd_middle_m = clean_sd[(clean_sd["Mother_edu_code"] == 4)]

# Dataframe with just the 'High School (Completed)' category for Moms
clean_sd_high_m = clean_sd[(clean_sd["Mother_edu_code"] == 5) ]

# Dataframe with just the 'Bachelors (Completed)' category for Moms
clean_sd_bachelor_m = clean_sd[(clean_sd["Mother_edu_code"] == 7)]

In [63]:
elem_sl_m_count = clean_sd_elemen_sl_m['Target'].value_counts()
elem_m_count = clean_sd_elementary_m['Target'].value_counts()
midd_m_count = clean_sd_middle_m['Target'].value_counts()
high_m_count = clean_sd_high_m['Target'].value_counts()
bach_m_count = clean_sd_bachelor_m['Target'].value_counts()

In [64]:
print("Some Elementary School Count (Mother)")
print(elem_sl_m_count)
print()
print("Elementary School Count (Mother)")
print(elem_m_count)
print()
print("Middle School Count (Mother)")
print(midd_m_count)
print()
print("High School Count (Mother)")
print(high_m_count)
print()
print("Bachelors Count (Mother)")
print(bach_m_count)

Some Elementary School Count (Mother)
Target
Graduate    337
Dropout     149
Name: count, dtype: int64

Elementary School Count (Mother)
Target
Graduate    459
Dropout     385
Name: count, dtype: int64

Middle School Count (Mother)
Target
Graduate    532
Dropout     303
Name: count, dtype: int64

High School Count (Mother)
Target
Graduate    565
Dropout     301
Name: count, dtype: int64

Bachelors Count (Mother)
Target
Graduate    238
Dropout     159
Name: count, dtype: int64


In [65]:
elem_sl_m_grad_percent = ((elem_sl_m_count.iloc[0]) / (elem_sl_m_count.iloc[0] + elem_sl_m_count.iloc[1])) * 100
elem_sl_m_drop_percent = ((elem_sl_m_count.iloc[1]) / (elem_sl_m_count.iloc[0] + elem_sl_m_count.iloc[1])) * 100

elem_m_grad_percent = ((elem_m_count.iloc[0]) / (elem_m_count.iloc[0] + elem_m_count.iloc[1])) * 100
elem_m_drop_percent = ((elem_m_count.iloc[1]) / (elem_m_count.iloc[0] + elem_m_count.iloc[1])) * 100

midd_m_grad_percent = ((midd_m_count.iloc[0]) / (midd_m_count.iloc[0] + midd_m_count.iloc[1])) * 100
midd_m_drop_percent = ((midd_m_count.iloc[1]) / (midd_m_count.iloc[0] + midd_m_count.iloc[1])) * 100

high_m_grad_percent = ((high_m_count.iloc[0]) / (high_m_count.iloc[0] + high_m_count.iloc[1])) * 100
high_m_drop_percent = ((high_m_count.iloc[1]) / (high_m_count.iloc[0] + high_m_count.iloc[1])) * 100

bach_m_grad_percent = ((bach_m_count.iloc[0]) / (bach_m_count.iloc[0] + bach_m_count.iloc[1])) * 100
bach_m_drop_percent = ((bach_m_count.iloc[1]) / (bach_m_count.iloc[0] + bach_m_count.iloc[1])) * 100

In [66]:
print(f"Students whose Moms completed Some Level of Elementary School and Graduated: {elem_sl_m_grad_percent}%")
print(f"Students whose Moms completed Some Level of Elementary School and Dropped Out: {elem_sl_m_drop_percent}%")
print()
print(f"Students whose Moms completed Elementary School and Graduated: {elem_m_grad_percent}%")
print(f"Students whose Moms completed Elementary School and Dropped Out: {elem_m_drop_percent}%")
print()
print(f"Students whose Moms completed Middle School and Graduated: {midd_m_grad_percent}%")
print(f"Students whose Moms completed Middle School and Dropped Out: {midd_m_drop_percent}%")
print()
print(f"Students whose Moms completed High School and Graduated: {high_m_grad_percent}%")
print(f"Students whose Moms completed High School and Dropped Out: {high_m_drop_percent}%")
print()
print(f"Students whose Moms completed their Bachelors and Graduated: {bach_m_grad_percent}%")
print(f"Students whose Moms completed their Bachelors and Dropped Out: {bach_m_drop_percent}%")

Students whose Moms completed Some Level of Elementary School and Graduated: 69.34156378600824%
Students whose Moms completed Some Level of Elementary School and Dropped Out: 30.65843621399177%

Students whose Moms completed Elementary School and Graduated: 54.383886255924175%
Students whose Moms completed Elementary School and Dropped Out: 45.61611374407583%

Students whose Moms completed Middle School and Graduated: 63.7125748502994%
Students whose Moms completed Middle School and Dropped Out: 36.287425149700596%

Students whose Moms completed High School and Graduated: 65.24249422632793%
Students whose Moms completed High School and Dropped Out: 34.75750577367205%

Students whose Moms completed their Bachelors and Graduated: 59.949622166246854%
Students whose Moms completed their Bachelors and Dropped Out: 40.050377833753146%


In [68]:
# Dataframe with just the 'Some Level Elementary School' category for Fathers
clean_sd_elemen_sl_f = clean_sd[(clean_sd["Father_edu_code"] == 2)]

# Dataframe with just the 'Elementary School (Completed)' category for Fathers
clean_sd_elementary_f = clean_sd[(clean_sd["Father_edu_code"] == 3)]

# Dataframe with just the 'Middle School (Completed)' category for Fathers
clean_sd_middle_f = clean_sd[(clean_sd["Father_edu_code"] == 4)]

# Dataframe with just the 'High School (Completed)' category for Fathers
clean_sd_high_f = clean_sd[(clean_sd["Father_edu_code"] == 5) ]

# Dataframe with just the 'Bachelors (Completed)' category for Fathers
clean_sd_bachelor_f = clean_sd[(clean_sd["Father_edu_code"] == 7)]

In [69]:
elem_sl_f_count = clean_sd_elemen_sl_f['Target'].value_counts()
elem_f_count = clean_sd_elementary_f['Target'].value_counts()
midd_f_count = clean_sd_middle_f['Target'].value_counts()
high_f_count = clean_sd_high_f['Target'].value_counts()
bach_f_count = clean_sd_bachelor_f['Target'].value_counts()

In [70]:
print("Some Level Elementary School Count (Father)")
print(elem_sl_f_count)
print()
print("Elementary School Count (Father)")
print(elem_f_count)
print()
print("Middle School Count (Father)")
print(midd_f_count)
print()
print("High School Count (Father)")
print(high_f_count)
print()
print("Bachelors Count (Father)")
print(bach_f_count)

Some Level Elementary School Count (Father)
Target
Graduate    417
Dropout     186
Name: count, dtype: int64

Elementary School Count (Father)
Target
Graduate    585
Dropout     437
Name: count, dtype: int64

Middle School Count (Father)
Target
Graduate    550
Dropout     286
Name: count, dtype: int64

High School Count (Father)
Target
Graduate    451
Dropout     283
Name: count, dtype: int64

Bachelors Count (Father)
Target
Graduate    151
Dropout     112
Name: count, dtype: int64


In [74]:
elem_sl_f_grad_percent = ((elem_sl_f_count.iloc[0]) / (elem_sl_f_count.iloc[0] + elem_sl_f_count.iloc[1])) * 100
elem_sl_f_drop_percent = ((elem_sl_f_count.iloc[1]) / (elem_sl_f_count.iloc[0] + elem_sl_f_count.iloc[1])) * 100

elem_f_grad_percent = ((elem_f_count.iloc[0]) / (elem_f_count.iloc[0] + elem_f_count.iloc[1])) * 100
elem_f_drop_percent = ((elem_f_count.iloc[1]) / (elem_f_count.iloc[0] + elem_f_count.iloc[1])) * 100

midd_f_grad_percent = ((midd_f_count.iloc[0]) / (midd_f_count.iloc[0] + midd_f_count.iloc[1])) * 100
midd_f_drop_percent = ((midd_f_count.iloc[1]) / (midd_f_count.iloc[0] + midd_f_count.iloc[1])) * 100

high_f_grad_percent = ((high_f_count.iloc[0]) / (high_f_count.iloc[0] + high_f_count.iloc[1])) * 100
high_f_drop_percent = ((high_f_count.iloc[1]) / (high_f_count.iloc[0] + high_f_count.iloc[1])) * 100

bach_f_grad_percent = ((bach_f_count.iloc[0]) / (bach_f_count.iloc[0] + bach_f_count.iloc[1])) * 100
bach_f_drop_percent = ((bach_f_count.iloc[1]) / (bach_f_count.iloc[0] + bach_f_count.iloc[1])) * 100

In [75]:
print(f"Students whose Fathers completed Some Level of Elementary School and Graduated: {elem_sl_f_grad_percent}%")
print(f"Students whose Fathers completed Some Level of Elementary School and Dropped Out: {elem_sl_f_drop_percent}%")
print()
print(f"Students whose Fathers completed Elementary School and Graduated: {elem_f_grad_percent}%")
print(f"Students whose Fathers completed Elementary School and Dropped Out: {elem_f_drop_percent}%")
print()
print(f"Students whose Fathers completed Middle School and Graduated: {midd_f_grad_percent}%")
print(f"Students whose Fathers completed Middle School and Dropped Out: {midd_f_drop_percent}%")
print()
print(f"Students whose Fathers completed High School and Graduated: {high_f_grad_percent}%")
print(f"Students whose Fathers completed High School and Dropped Out: {high_f_drop_percent}%")
print()
print(f"Students whose Fathers completed their Bachelors and Graduated: {bach_f_grad_percent}%")
print(f"Students whose Fathers completed their Bachelors and Dropped Out: {bach_f_drop_percent}%")

Students whose Fathers completed Some Level of Elementary School and Graduated: 69.15422885572139%
Students whose Fathers completed Some Level of Elementary School and Dropped Out: 30.845771144278604%

Students whose Fathers completed Elementary School and Graduated: 57.24070450097848%
Students whose Fathers completed Elementary School and Dropped Out: 42.759295499021526%

Students whose Fathers completed Middle School and Graduated: 65.78947368421053%
Students whose Fathers completed Middle School and Dropped Out: 34.21052631578947%

Students whose Fathers completed High School and Graduated: 61.444141689373296%
Students whose Fathers completed High School and Dropped Out: 38.555858310626704%

Students whose Fathers completed their Bachelors and Graduated: 57.414448669201526%
Students whose Fathers completed their Bachelors and Dropped Out: 42.585551330798474%
