# Major Fully Online Analysis

Looking at just this last Fall 2023, I examined the proportion of students who were fully online verses those that were not fully online. Then I did some more nuanced research.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Load credit hour data. This is 20th-Day credit hour data 
# from Fall 2020 to Spring 2024
df = pd.read_csv('FA20 to SP24 20th Day CrHr.csv')

df.columns = [i.lower() for i in df.columns]

df['term_id'] = [str(df['term'][i]) + df['id'][i] for i in range(len(df))]

df = df[['id', 'term', 'term_id', 'totcr', 'stype', 'rescode', 'resd_desc', 'degree', 'majr_desc1', 
         'gender', 'mrtl','ethn_desc', 'cnty', 'pt', 'loc', 'crn', 'sub', 'crs', 'title', 
         'cr', 're', 'div', 'crs cd']]

In [None]:
# Calculate the number of seats filled for each semester in the dataset

filt_df = df[df['majr_desc1'] == 'Criminal Justice']

sem_seats_filled_cj =  (pd.DataFrame(filt_df.groupby('term')['id'].count())
                          .reset_index()
                          .rename(columns = {'id':'seats_filled'})
                       )

sem_seats_filled_all = (pd.DataFrame(df.groupby('term')['id'].count())
                          .reset_index()
                          .rename(columns = {'id':'seats_filled'})
                       )

sem_hc_cj = (pd.DataFrame(filt_df.groupby('term_id')
                                 .first()
                                 .reset_index()
                                 .groupby('term')['id'].count())
               .reset_index()
            )

sem_hc_all = (pd.DataFrame(df.groupby('term_id')
                             .first()
                             .reset_index()
                             .groupby('term')['id'].count())
                .reset_index()
             )

(sem_seats_filled_all
     .merge(sem_seats_filled_cj, how = 'left', on = 'term')
     .merge(sem_hc_all, how = 'left', on = 'term')
     .merge(sem_hc_cj, how = 'left', on = 'term')
     .rename(columns = {'term':'Semester',
                        'seats_filled_x':'All Seats Filled',
                        'seats_filled_y':'CJ Seats Filled',
                        'id_x':'All Headcount',
                        'id_y':'CJ Headcount'})
)

In [None]:
# Create filters for analysis
mask1 = df['majr_desc1'] == 'Criminal Justice'
mask2 = df['term'].isin([202080, 202110, 202180, 202210, 202280, 202310])

# Apply filters
cj_majors = df[mask2].reset_index(drop = True)

#Create dictionary object for storing online only
online_only = {}

for i in cj_majors['term_id'].unique():
    # Isolate each individual ID for each term
    temp = cj_majors[cj_majors['term_id'] == i]
    # Create object for storing list of locations of each class for each ID
    temp_ls = []
    for j in list(temp['loc']):
        # If location is virtual (online) then label it as online
        # else label it 'In_Person'
        if j == 'V':
            temp_ls.append('Online')
        else:
            temp_ls.append('In_Person')
    # If 'In_Person' appears in the list of locations for each student
    # even once, then label as "Not Fully Online", else label as 
    # 'Fully Online'
    if 'In_Person' in temp_ls:
        online_only[i] = "Not Fully Online"
    else:
        online_only[i] = "Fully Online"

I vetted my programming above by looking through 20 separate student ID's to make sure that those labeled "Fully Online" were, indeed, fully online. Take special note when you do this that you will see some that initially appear to have an in-person class who are labeled as "Fully Online." This is because they are in an online class that has a zoom session. Therefore, the programming is still correct. They are fully online. 

In [None]:
# Examine Online with the ID
(pd.DataFrame.from_dict(online_only, orient = 'index')
   .reset_index()
   .rename(columns = {'index':'id',
                      0:'Online Status'})
)

In [None]:
len(online_only)

In [None]:
# Create object for storing the count of students who are labeled as "Fully Online"
record = 0

for i in list(online_only.values()):
    if i == 'Fully Online':
        record += 1
    else:
        record += 0

# Divide the number of students who are fully online by the total number of possible students
record/len(online_only)

### Sub Analysis of <u>Proportion</u> Of Classes For Each CJ Major That Are Fully Online

In [None]:
mask1 = df['majr_desc1'] == 'Criminal Justice'
mask2 = df['term'].isin([202080, 202110, 202180, 202210, 202280, 202310])

cj_majors = df[mask1 & mask2].reset_index(drop = True)

online_only = {}

for i in cj_majors['term_id'].unique():
    temp = cj_majors[cj_majors['term_id'] == i]
    
    temp_ls = []
    for j in list(temp['loc']):
        if j == 'V':
            temp_ls.append('Online')
        else:
            temp_ls.append('In_Person')
            
    prop_online = 0
    for k in temp_ls:
        if k == 'Online':
            prop_online += 1
        else:
            prop_online += 0
    
    online_only[i] = (prop_online/len(temp))

In [None]:
online_only
output = None

In [None]:
# Object to store proportion of credits that are fully online
proportions = []

for i in list(online_only.values()):
    if i >= 0.0 and i <= 0.25:
        proportions.append('0% - 25%')
    elif i > 0.25 and i <= 0.50:
        proportions.append('25% - 50%')
    elif i > 0.50 and i <= 0.75:
        proportions.append('50% - 75%')
    else:
        proportions.append('75% - 100%')
        
# Convert list to a dataframe
strat_online_cj = (pd.DataFrame(pd.Series(proportions))
                    .reset_index()
                    .rename(columns = {'index':'id',
                                       0:'Proportion Online'})
                  )
                   
# Group the stratified (strat) number of online credit hours and count frequency
# which is just the headcount of students in each grouping
prop_online_cj = (pd.DataFrame(strat_online_cj.groupby('Proportion Online')['id'].count())
                    .reset_index()
                    .rename(columns = {'id':'count'})
                 )

# Calculate the percentage of students who are in each
prop_online_cj['% Rep'] = round(prop_online_cj['count']/prop_online_cj['count'].sum(), 4) * 100

# Show dataframe
prop_online_cj

In [None]:
import matplotlib.pyplot as plt

# Create bar chart
bars = plt.bar(prop_online_cj['Proportion Online'], 
               prop_online_cj['% Rep'], 
               color = '#800080', 
               edgecolor = '#996515')
plt.xlabel('Proportion Of Classes Online')
plt.ylabel('Percent Of Classes')
plt.title('FA20 to SP23 Proportion Of Classes Online')

# Add data labels
for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width() / 2, 
             height, 
             str(round(height, 4)) + "%", 
             ha = 'center', 
             va = 'bottom')

# Show plot
plt.show()