#### This jupyter notebook (python) will generate plots based on jobs register data provided by the American Astronomical Society (AAS) for the AAS Employment Committee. The intent of this notebook is to identify trends in job register data that can be used to inform the the AAS community about how the job market is evolving. This notebook is in development.  It currently utilizes the Tables function from the astropy python module. Future versions may switch to utilize pandas instead.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from astropy.table import Table #to install module you can (hopefully) run '!pip install astropy' 

In [None]:
#%matplotlib ipympl # for interactive plots

In [None]:
#location of the jobs register data file. BE SURE TO UPDATE THIS LOCATION, FILENAME AND FILE FORMAT BASED ON YOUR LOCAL FILE. CSV is recommended.
#######################################

table_loc = '1024-2024-JR-report-2.csv'

#save table to astropy table 
main_table_raw = Table.read(table_loc, format='csv')

#######################################


#file saves from google sheets with many extra rows that are empty (masked). So lets remove the masked entries from the table we are working with.
#the first column 'i' is saved as '\ufeffi' for some reason. If that row has a NaN (mask) then remove the row from the main table
main_table = Table(main_table_raw[main_table_raw['\ufeffi'].mask != True])

main_table.show_in_notebook(jsviewer=True) #if not using a jupyter notebook then this is the only command that wont work.
#main_table.show_in_browser(jsviewer=True) #doesn't seem to work yet for browsers. Will double check.

In [None]:
main_table.colnames

In [None]:
def make_fig_1(table_data, start_date, end_date, filename_root, file_type = 'png'):
    """
    This function makes Fig 1: A bar plot showing the number of job listings by position category (e.g., postdoc, grad student, faculty, etc).

    table_data -- astropy Table object based on jobs register data file
    start_date, end date -- interval during which you want to plot data using application deadline (ISO date format e.g., '2025-01-24')
    filename_root -- string for naming the output png file.
    plot_type -- file format for the plot
    """


    #check if input time is within bounds of table min/max dates. If not, adjust time to match min/max dates in table.
    if start_date < min(table_data['deadline']):
        date1 = min(table_data['deadline'])
    else:
        date1 = start_date
    
    if end_date > max(table_data['deadline']):
        date2 = max(table_data['deadline'])
    else:
        date2 = end_date
    
    #filter the table based on date -- format is 2024-04-28
    time_table = table_data[ (table_data['deadline'] >= date1) & (table_data['deadline'] < date2) ] 

    #setup the bar plot by finding the count for each unique string (category)
    category, category_count = np.unique(time_table['category'], return_counts = True)

    #create the plot
    fig1, ax1 = plt.subplots(1,1)
    ax1.barh(category, category_count, align = 'center')
    ax1.set_xlabel('Number of Applications')
    ax1.set_title(f'Job Advertisments with Deadlines from {date1} to {date2} ')

    #fig1.subplots_adjust(left=0.4) #necessary for fitting the long strings on left side of plot?
    fig1.set_size_inches(12,4)
    fig1.savefig(f'fig1_{filename_root}.png') #save the figure

    return()


make_fig_1(table_data = main_table, start_date = '2022-03-01', end_date = '2027-03-01', filename_root = 'test_v1', file_type = 'png') #generates the figure

In [None]:
def make_fig_2(table_data, start_date, end_date, filename_root, file_type = 'png'):
    """
    This function makes Fig 3: A bar plot showing the number of job listings by employer category (e.g., academic, industry, observatory, etc).

    table_data -- astropy Table object based on jobs register data file
    start_date, end date -- interval during which you want to plot data using application deadline (ISO date format e.g., '2025-01-24')
    filename_root -- string for naming the output png file.
    plot_type -- file format for the plot
    """


    #check if input time is within bounds of table min/max dates. If not, adjust time to match min/max dates in table.
    if start_date < min(table_data['deadline']):
        date1 = min(table_data['deadline'])
    else:
        date1 = start_date
    
    if end_date > max(table_data['deadline']):
        date2 = max(table_data['deadline'])
    else:
        date2 = end_date
    
    #filter the table based on date -- format is 2024-04-28
    time_table = table_data[ (table_data['deadline'] >= date1) & (table_data['deadline'] < date2) ] 

    #setup the bar plot by finding the count for each unique string (category)
    category, category_count = np.unique(time_table['instclass'], return_counts = True)

    #create the plot
    fig2, ax2 = plt.subplots(1,1)
    ax2.barh(category, category_count, align = 'center')
    ax2.set_xlabel('Number of Applications')
    ax2.set_title(f'Job Advertisments with Deadlines from {date1} to {date2} ')

    #fig2.subplots_adjust(left=0.4) #necessary for fitting the long strings on left side of plot?
    fig2.set_size_inches(12,4)
    fig2.savefig(f'fig2_{filename_root}.png') #save the figure

    return()


make_fig_2(table_data = main_table, start_date = '2022-03-01', end_date = '2027-03-01', filename_root = 'test_v1', file_type = 'png') #generates the figure




In [None]:
def make_fig_3(table_data, start_date, end_date, filename_root, file_type = 'png'):
    """
    This function makes Fig 3: A bar plot showing the number of job listings by position location (e.g., in-person, hybrid, remote).

    table_data -- astropy Table object based on jobs register data file
    start_date, end date -- interval during which you want to plot data using application deadline (ISO date format e.g., '2025-01-24')
    filename_root -- string for naming the output png file.
    plot_type -- file format for the plot
    """


    #check if input time is within bounds of table min/max dates. If not, adjust time to match min/max dates in table.
    if start_date < min(table_data['deadline']):
        date1 = min(table_data['deadline'])
    else:
        date1 = start_date
    
    if end_date > max(table_data['deadline']):
        date2 = max(table_data['deadline'])
    else:
        date2 = end_date
    
    #filter the table based on date -- format is 2024-04-28
    time_table = table_data[ (table_data['deadline'] >= date1) & (table_data['deadline'] < date2) ] 

    #setup the bar plot by finding the count for each unique string (category)
    category, category_count = np.unique(time_table['work_arrangement'], return_counts = True)

    #create the plot
    fig3, ax3 = plt.subplots(1,1)
    ax3.barh(category, category_count, align = 'center')
    ax3.set_xlabel('Number of Applications')
    ax3.set_title(f'Job Advertisments with Deadlines from {date1} to {date2} ')

    #fig3.subplots_adjust(left=0.4) #necessary for fitting the long strings on left side of plot?
    fig3.set_size_inches(12,4)
    fig3.savefig(f'fig3_{filename_root}.png') #save the figure

    return()


make_fig_3(table_data = main_table, start_date = '2022-03-01', end_date = '2027-03-01', filename_root = 'test_v1', file_type = 'png') #generates the figure




In [None]:
def make_fig_4(table_data, start_date, end_date, filename_root, file_type = 'png'):
    """
    This function makes Fig 3: A bar plot showing the number of job listings by position location (e.g., in-person, hybrid, remote).

    table_data -- astropy Table object based on jobs register data file
    start_date, end date -- interval during which you want to plot data using application deadline (ISO date format e.g., '2025-01-24')
    filename_root -- string for naming the output png file.
    plot_type -- file format for the plot
    """


    #check if input time is within bounds of table min/max dates. If not, adjust time to match min/max dates in table.
    if start_date < min(table_data['deadline']):
        date1 = min(table_data['deadline'])
    else:
        date1 = start_date
    
    if end_date > max(table_data['deadline']):
        date2 = max(table_data['deadline'])
    else:
        date2 = end_date
    
    #filter the table based on date -- format is 2024-04-28
    time_table = table_data[ (table_data['deadline'] >= date1) & (table_data['deadline'] < date2) ] 

    #setup the bar plot by finding the count for each unique string (category)
    category, category_count = np.unique(time_table['num_positions'], return_counts = True)

    #create the plot
    fig4, ax4 = plt.subplots(1,1)
    ax4.barh(category, category_count, align = 'center')
    ax4.set_xlabel('Number of Applications')
    ax4.set_ylabel('Number of Positions Available in Advertisment')
    ax4.set_title(f'Job Advertisments with Deadlines from {date1} to {date2} ')

    #fig4.subplots_adjust(left=0.4) #necessary for fitting the long strings on left side of plot?
    fig4.set_size_inches(12,4)
    fig4.savefig(f'fig4_{filename_root}.png') #save the figure

    return()


make_fig_4(table_data = main_table, start_date = '2022-03-01', end_date = '2027-03-01', filename_root = 'test_v1', file_type = 'png') #generates the figure