In [1]:
from pathlib import Path
import pandas as pd
from config import PROGRAM_COST_PATH, APPLICANT_DATA_PATH, NEEDED_COLUMNS

def get_latest_file(dir_path: str) -> Path:
    p = Path(dir_path)
    files = list(p.glob('*.csv'))
    if not files:
        return None
    
    return max(files, key=lambda f: f.stat().st_mtime)

def load_dataframe(file_path):
    print(file_path)
    if file_path is None:
        raise FileNotFoundError("No matching files found")
    return pd.read_csv(file_path)

def load_data_pipeline() -> pd.DataFrame:
    applicant_file_path = get_latest_file(dir_path=APPLICANT_DATA_PATH)
    applicant_dataframe = load_dataframe(file_path=applicant_file_path)

    program_cost_file_path = get_latest_file(dir_path=PROGRAM_COST_PATH)
    program_cost_dataframe = load_dataframe(file_path=program_cost_file_path)

    merged_df = pd.merge(applicant_dataframe, program_cost_dataframe, on='Program Name', how='left')
    return merged_df[NEEDED_COLUMNS]

df = load_data_pipeline()

/Users/advisor/cea-budget-dashboard/data/Applicants/budget_dashboard-2026-01-29-09_35_34.csv
/Users/advisor/cea-budget-dashboard/data/ProgramCosts/ProgramCost.csv


In [2]:
df.columns

Index(['User ID', 'Name', 'User Last Name', 'User First Name', 'Program Name',
       'Program Type', 'Program Group', 'Program Year', 'Program Term',
       'Application Status', 'Phase', 'Cost'],
      dtype='object')

In [3]:
df.head()

Unnamed: 0,User ID,Name,User Last Name,User First Name,Program Name,Program Type,Program Group,Program Year,Program Term,Application Status,Phase,Cost
0,24446,Jackson Oscar,Jackson,Oscar,IES Abroad Cape Town - University of Cape Town,Outgoing,Semester and Year Programs (General),2026,Spring,Committed,Post Decision,20950.0
1,24450,Cristanetti-Walker Alessio,Cristanetti-Walker,Alessio,IES Abroad Milan - Business Studies,Outgoing,Semester and Year Programs (General),2026,Spring,Committed,While Abroad,21913.0
2,24452,Nguyen Kelly,Nguyen,Kelly,University of Sydney Exchange - Direct Enrollment,Outgoing,Exchange Programs,2026,Spring,Committed,Post Decision,12956.0
3,20742,Deveney Padraig,Deveney,Padraig,University of Sydney Exchange - Direct Enrollment,Outgoing,Exchange Programs,2026,Spring,Committed,Post Decision,12956.0
4,24398,Wang Victoria,Wang,Victoria,University of Sydney Exchange - Direct Enrollment,Outgoing,Exchange Programs,2026,Spring,Committed,Post Decision,12956.0


In [4]:
df['Program Term'].value_counts()

Program Term
Spring       106
Fall          63
Full year     15
Name: count, dtype: int64

In [5]:
df[df['Program Term'] == 'Full year']

Unnamed: 0,User ID,Name,User Last Name,User First Name,Program Name,Program Type,Program Group,Program Year,Program Term,Application Status,Phase,Cost
91,24542,Gliedman Alessandro,Gliedman,Alessandro,Yonsei University,Outgoing,Exchange Programs,2026,Full year,Pending,Pre Decision,0.0
92,24542,Gliedman Alessandro,Gliedman,Alessandro,Seoul National University,Outgoing,Exchange Programs,2026,Full year,Pending,Pre Decision,0.0
99,26032,Wang Luxi,Wang,Luxi,IES Abroad Amsterdam - Psychology & Sciences,Outgoing,Semester and Year Programs (General),2026,Full year,Pending,Pre Decision,23950.0
100,26033,Zhang Siqi,Zhang,Siqi,IES Abroad Rome - Language & Area Studies,Outgoing,Semester and Year Programs (General),2026,Full year,Pending,Pre Decision,21290.0
108,24460,Vernelus Bethsa,Vernelus,Bethsa,IES Abroad London - Health Practice & Policy,Outgoing,Semester and Year Programs (General),2026,Full year,Pending,Pre Decision,24636.0
112,24542,Gliedman Alessandro,Gliedman,Alessandro,Kyoto University,Outgoing,Exchange Programs,2026,Full year,Pending,Pre Decision,0.0
118,26111,Kelly Karis,Kelly,Karis,University College Dublin,Outgoing,Semester and Year Programs (General),2026,Full year,Pending,Pre Decision,7350.0
125,26187,Zhao Qiuwan,Zhao,Qiuwan,Queen Mary University of London,Outgoing,Semester and Year Programs (General),2026,Full year,Pending,Pre Decision,11000.0
138,24758,Perlera Janelly,Perlera,Janelly,Queen Mary University of London,Outgoing,Semester and Year Programs (General),2026,Full year,Pending,Pre Decision,11000.0
142,25874,Nguyen Nhat Linh,Nguyen,Nhat Linh,Waseda University,Outgoing,Exchange Programs,2026,Full year,Pending,Pre Decision,
