In [1]:
import requests
import json
import pandas as pd

In [2]:
def model_predict(data_original):
    
    data = data_original.copy()    
    data = data.fillna('N/A')

    # Specific conversion for numeric columns that need to be displayed as integers (no decimals)
    columns_to_convert = ['Veteran status', 'Work authorization', 'Disability', 'Ethnicity']
    for col in columns_to_convert:
        if col in data.columns:
            data[col] = data[col].apply(lambda x: str(int(x)) if x != 'N/A' else 'N/A')

    # Format GPA with two decimal places
    if 'GPA' in data.columns:
        data['GPA'] = data['GPA'].apply(lambda x: f"{x:.2f}" if x != 'N/A' else 'N/A')
    
    data = data.to_dict(orient='records')
    # Serialize the input data to JSON
    dataset = json.dumps(data)
    
    # Define the headers for JSON content type
    headers = {'Content-Type': 'application/json'}
    
    # Call the first API - resume scorer
    resume_url = 'https://jennjwang.pythonanywhere.com'
    resume_response = requests.post(resume_url, data=dataset, headers=headers)
    
    if not resume_response.ok:
        print("Error:", resume_response.status_code)
        return None
    
    try:
        resume_response_data = json.loads(resume_response.text)
        resume_predictions = json.loads(resume_response_data['prediction'])
        resume_score_map = {item['applicant_id']: item['score'] for item in resume_predictions}
    except json.JSONDecodeError as e:
        print("Error decoding JSON:", e)
        return None
    

    # Update the input data with the resume score
    for applicant in data:
        applicant_id = str(applicant['Applicant ID'])
        applicant['Resume score'] = resume_score_map.get(applicant_id, 0)
    
    # Serialize the updated data for the next API call
    updated_dataset = json.dumps(data)    

    # Call the second API - candidate scorer
    candidate_url = 'https://heonlee.pythonanywhere.com'
    candidate_response = requests.post(candidate_url, data=updated_dataset, headers=headers)
    
    if not candidate_response.ok:
        print("Error:", candidate_response.status_code)
        return None
    
    try:
        candidate_response_data = json.loads(candidate_response.text)
        final_predictions = json.loads(candidate_response_data['prediction'])
        final_score_map = {item['applicant_id']: item['prediction'] for item in final_predictions}
    except json.JSONDecodeError as e:
        print("Error decoding JSON for the second API:", e)
        return None
    
    
    for applicant in data:
        applicant_id = str(applicant['Applicant ID'])
        applicant['Interview prediction'] = final_score_map.get(applicant_id, 0)
        
    results = pd.DataFrame(data)
    results['GPA'] = results['GPA'].astype(float)
    results['Resume score'] = results['Resume score'].astype(float)
    results['Interview prediction'] = results['Interview prediction'].astype(int)
    
    return results

In [3]:
df = pd.read_csv('../data/candidate_data_4000.csv')
df_prediction = model_predict(df)
df_prediction

Unnamed: 0,Applicant ID,School Name,GPA,Degree,Location,Gender,Veteran status,Work authorization,Disability,Ethnicity,...,Start 1,End 1,Role 2,Start 2,End 2,Role 3,Start 3,End 3,Resume score,Interview prediction
0,1,Rhode Island School of Design,3.1,Bachelors,Miami,M,1,0,0,2,...,7/16,7/22,Junior SWE,11/19,11/22,Senior SWE,12/21,12/22,5.63,0
1,2,Bentley University,1.9,Phd,New York City,,0,0,1,0,...,11/13,11/18,ML Engineer,11/14,12/14,,,,5.02,0
2,3,SUNY New Paltz,1.7,Masters,Washington D.C.,M,1,1,1,1,...,4/22,6/22,,,,,,,3.69,0
3,4,Brown University,2.8,Bachelors,Los Angeles,F,1,0,1,2,...,6/22,11/22,Lawyer,1/12,4/12,Lawyer,8/13,8/14,3.30,0
4,5,SUNY Binghamton University,2.8,Masters,Los Angeles,,1,1,,4,...,6/11,,Junior SWE,6/10,7/10,Junior SWE,7/23,10/23,1.67,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3995,3996,Rhode Island School of Design,2.3,Masters,Detroit,F,1,0,,0,...,3/15,3/18,Lawyer,5/21,5/22,,,,9.32,0
3996,3997,SUNY Binghamton University,1.6,Phd,Providence,,0,0,,0,...,10/17,10/18,Data scientist,5/21,5/22,Junior SWE,5/16,5/22,7.02,0
3997,3998,SUNY New Paltz,1.2,Bachelors,New York City,F,0,1,,4,...,8/10,8/17,,,,,,,7.82,0
3998,3999,Brown University,1.8,Bachelors,Washington D.C.,,0,1,1,4,...,1/16,1/20,Chef,9/11,9/13,Senior SWE,9/14,9/22,5.32,0


In [4]:
# # save data to local drive
# csv_file_path = '../results/candidate_data_4000_res_1.csv'  
# df_prediction.to_csv(csv_file_path, index=False)