# Dashboard Historical student data

## 1. Most common pathways taken:


In [30]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from dash import Dash, dcc, html, Input, Output

### Data cleaning: (This can be extracted to an object or just create a funciton that is call once and outputs a clean file, while the process is not define I will just add the cleaning functions to this file)

In [31]:
# Cleaning and testing function
def Enrollments_cleaning(df: pd.DataFrame) -> pd.DataFrame:
    # Columns to clean
    COLUMNS_TO_DROP = ['Full Name']
    result = df.drop(columns=COLUMNS_TO_DROP)

    # Fix NaN values
    NAN_VALUE_SUBSTITUTE = 'NA'
    columns_to_fix = {
        'Projected Start Date': NAN_VALUE_SUBSTITUTE, 'Actual Start Date': NAN_VALUE_SUBSTITUTE, 'Projected End Date': NAN_VALUE_SUBSTITUTE,
        'Actual End Date': NAN_VALUE_SUBSTITUTE, 'Outcome': NAN_VALUE_SUBSTITUTE, 'ATP Cohort': NAN_VALUE_SUBSTITUTE
    }

    for column, substitute_value in columns_to_fix.items():
        result[column] = result[column].fillna(substitute_value)

    # Added the tests inside the cleaning function because it cannot be on a separeted folder structure until testing methods are define
    Test_enrollments_cleaning(result)

    return result

def Test_enrollments_cleaning(clean_df: pd.DataFrame):
    # Parameter can be change to an in-function call of the data cleanner with the DF
    assert ~clean_df.isna().any().any(), 'The Dataframe has na values.'

In [32]:
enrollments = pd.read_excel('Data\\Raw\\ARC Enrollments.xlsx')
enrollments = Enrollments_cleaning(enrollments)
enrollments

Unnamed: 0,Auto Id,KY Region,Assessment ID,EnrollmentId,Enrollment Service Name,Service,Projected Start Date,Actual Start Date,Projected End Date,Actual End Date,Outcome,ATP Cohort
0,202109-5224,SOAR,OA-003348,Enrollment-1386,ES-0011193,Career Readiness Workshop,2021-11-11 00:00:00,,,,,
1,202109-5224,SOAR,OA-003348,Enrollment-1386,ES-0013492,Software Development 1,2022-01-05 00:00:00,2022-01-05 00:00:00,2022-04-06 00:00:00,2022-04-06 00:00:00,Successfully Completed,2022-01-01 00:00:00
2,202109-5224,SOAR,OA-003348,Enrollment-1386,ES-0014187,Career Readiness Workshop,2022-03-07 00:00:00,,,,,
3,202109-5224,SOAR,OA-003348,Enrollment-1386,ES-0015022,Software Development 2,2022-05-04 00:00:00,2022-05-04 00:00:00,2022-07-29 00:00:00,2022-07-29 00:00:00,Successfully Completed,2022-05-01 00:00:00
4,202109-5224,SOAR,OA-003348,Enrollment-1386,ES-0015075,Web Development 1,2021-09-08 00:00:00,2021-09-08 00:00:00,2021-12-14 00:00:00,2021-12-14 00:00:00,Successfully Completed,2021-09-01 00:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...
2028,202504-21723,SOAR,OA-022760,Enrollment-14196,ES-0035149,Intro to Programming Core,2025-05-12 00:00:00,2025-05-12 00:00:00,2025-06-27 00:00:00,,,2025-05-01 00:00:00
2029,202505-22788,SOAR,OA-023710,Enrollment-14213,ES-0035212,Intro to Programming Core,2025-05-14 00:00:00,2025-05-13 00:00:00,2025-06-17 00:00:00,,,2025-05-01 00:00:00
2030,202408-16568,SOAR,OA-017961,Enrollment-14833,ES-0036429,Intro to Programming Core,2025-05-12 00:00:00,,,,Did Not Complete,2025-05-01 00:00:00
2031,202408-16568,SOAR,OA-017961,Enrollment-14833,ES-0036430,Supportive Services Referral,,,,,,


### 1.1 Most common path by period
- Periods are going to be define by ATP Cohort

In [None]:
# Starter pathways are the only path that have to be taken into consideration for each period student pathway choosing
Starter_pathways = [
    'Software Development 1', 
    'Web Development 1', 
    'Data Analysis 1',
    'Web Development M1',
    'Data Analysis M1', 
    'Software Development M1',
    'Quality Assurance M1', 
    'User Experience M1',
]

In [34]:
# What are the NA values in 'ATP Cohort'?
enrollments[enrollments['ATP Cohort'] == 'NA'].value_counts('Service')

# looks like pathways are not represented in ATP Cohort NA Values (Probably has more to do with support entries)
# only 6 of the 460 ATP cohort 'NA' values are pathways

Service
Career Readiness Workshop           224
One-on-one Job Readiness             87
Remote Jobs Workshop (EDA Grant)     30
Referral to External Service         20
Supportive Services Referral         20
Tech Communications Workshop         20
Employer Event (Code:You)            14
Demo Day Participant                 12
Remote Jobs Workshop (non EDA)        8
Technical Project Management          6
Resume Review and Optimization        6
Interview Guidance and Practice       5
Data Analysis M4                      2
Web Development M4                    2
Revised Resume Review                 2
Software Development M4               1
Web Development 1                     1
Name: count, dtype: int64

In [35]:
cohorts = list(enrollments['ATP Cohort'].unique())
cohorts.remove('NA')
cohorts


[Timestamp('2022-01-01 00:00:00'),
 Timestamp('2022-05-01 00:00:00'),
 Timestamp('2021-09-01 00:00:00'),
 Timestamp('2023-01-01 00:00:00'),
 Timestamp('2023-05-01 00:00:00'),
 Timestamp('2022-09-01 00:00:00'),
 Timestamp('2024-05-01 00:00:00'),
 Timestamp('2024-08-01 00:00:00'),
 Timestamp('2023-08-01 00:00:00'),
 Timestamp('2024-01-01 00:00:00'),
 Timestamp('2025-01-01 00:00:00'),
 Timestamp('2025-05-01 00:00:00')]

In [36]:
mask_starter_pathways = enrollments['Service'].isin(Starter_pathways)
starter_only_enrollments = enrollments[mask_starter_pathways]

In [None]:
app = Dash(__name__)
# Const
options = cohorts
pathway_color = {
    'Software Development 1': 'green', 
    'Web Development 1': 'blue', 
    'Data Analysis 1': 'red',
    'Web Development M1': 'blue',
    'Data Analysis M1': 'red', 
    'Software Development M1': 'green',
    'Quality Assurance M1': 'yellow', 
    'User Experience M1': 'purple'
}

# Display
app.layout = html.Div([
    html.H2('Cohorts', style={'text-align': "center"}),
    html.P('Select Cohort:'),
    dcc.Dropdown(
        id="dropdown",
        options=options,
        value=options[0],
        clearable=False,
    ),
    dcc.Graph(id="graph")
    
], style={'backgroundColor':'white'})

@app.callback(
    Output("graph", "figure"),
    Input("dropdown", "value"))

# Graph
def tt(time):
    df = starter_only_enrollments[starter_only_enrollments['ATP Cohort'] == pd.Timestamp(time)].value_counts('Service').reset_index()
    df['color'] = df['Service'].map(pathway_color)
    fig = px.pie(df, names='Service', values='count', color='Service', color_discrete_map=pathway_color)
    return fig

app.run(debug=True)

# TODO: Add number of students per each cohort 
# TODO: Fix the options on the selection 
# TODO: make colors better