# Narrative Generation usecase

Extract information about the usecase for narrative generation (Section 4.7)
- Data: First 20 recommended jobs for user: UID = 13
- Recommendation system: Logistic Regression; EBM\_side\_info
- Source of potential application: KNN ranking data

Visualization
- LogReg: 1st recommended job
- EBM_\side\_info: 1st recommended job

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
# Run this for reproduce
# Gets the current working directory
import os
cwd = os.getcwd()
print("Working directory:", cwd)
# Go up one directory from working directory
os.chdir("..")

Working directory: /Users/anhtth/Library/CloudStorage/OneDrive-UniversityofTwente/2023 UT- THESIS/1-Code/0.cb12_main/nb_xai_viz


In [3]:
import pandas as pd
import numpy as np

# Load actual interaction data (true label)

In [5]:
# Load input: train_data_flat, test_data_flat
train_data_flat = pd.read_csv('./xai_posthoc/train_data_flat.csv')
test_data_flat = pd.read_csv('./xai_posthoc/test_data_flat.csv')

In [6]:
drop_cols_flat = ['UserID','JobID', 'label','u_idx','j_idx',
                  'work_history_matrix', 'job_matrix', 
                  'mean_work_history_matrix', 'mean_job_matrix', 
                  'flat_work_history', 'flat_job']
X_train = train_data_flat.drop(columns = drop_cols_flat, axis = 1)
X_test = test_data_flat.drop(columns = drop_cols_flat, axis = 1)

In [7]:
y_train = np.load("./xai_posthoc/y_train_tabular.npy")
y_test = np.load("./xai_posthoc/y_test_tabular.npy")

# Load detail information about the usecase 

In [8]:
job_set = pd.read_csv("./data_processed/jobset_clean.csv")
user_set = pd.read_csv("./data_interim/user_set_cleaned.csv")
work_history = pd.read_csv('./data_interim/work_history_cleaned.csv')

In [9]:
user_raw = pd.read_csv("./data_processed/users_clean.csv")
history_raw = pd.read_csv("./data_processed/work_history.csv")

## User information: UID = 13

In [10]:
posthoc_sample = test_data_flat.iloc[0:20,:]

In [11]:
posthoc_sample

Unnamed: 0,UserID,JobID,label,City,State,Country,DegreeType,WorkHistoryCount,TotalYearsExperience,CurrentlyEmployed,...,job_matrix_90,job_matrix_91,job_matrix_92,job_matrix_93,job_matrix_94,job_matrix_95,job_matrix_96,job_matrix_97,job_matrix_98,job_matrix_99
0,13,821691,1,0.0,1.0,1.0,4,6,5.0,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.142635,0.0,0.0,0.0
1,13,329572,0,0.0,0.0,1.0,4,6,5.0,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.140024,0.0,0.0,0.0
2,514,131166,1,0.0,0.0,1.0,5,4,5.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.12831,0.0,0.0,0.141762
3,514,620304,0,0.0,0.0,1.0,5,4,5.0,0,...,0.0,0.061386,0.0,0.1987,0.0,0.0,0.104008,0.0,0.0,0.057456
4,681,654542,1,0.0,1.0,1.0,1,2,4.0,0,...,0.0,0.344611,0.0,0.0,0.190686,0.102102,0.218956,0.0,0.0,0.0
5,681,625758,1,0.0,1.0,1.0,1,2,4.0,0,...,0.0,0.044421,0.132267,0.0,0.09832,0.0,0.0,0.0,0.0,0.0
6,681,15081,1,0.0,1.0,1.0,1,2,4.0,0,...,0.0,0.64832,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,681,291467,0,0.0,0.0,1.0,1,2,4.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.164057,0.119839,0.0,0.090628
8,681,1035056,0,0.0,0.0,1.0,1,2,4.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.170616
9,681,2083,0,0.0,0.0,1.0,1,2,4.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
# posthoc_sample.to_csv('./xai_posthoc/posthoc_sample.csv', header=True, index=False)

In [14]:
test_uid = 13

In [15]:
usecase_info = user_raw[user_raw.UserID==test_uid]
usecase_history = work_history[work_history.UserID==test_uid]

In [16]:
usecase_info

Unnamed: 0,UserID,WindowID,Split,City,State,Country,ZipCode,DegreeType,Major,GraduationDate,WorkHistoryCount,TotalYearsExperience,CurrentlyEmployed,ManagedOthers,ManagedHowMany
193642,13,6,Test,Philadelphia,PA,US,19143,Bachelor's,Psychological & Social Sciences,2011-12-01 00:00:00,6,5.0,Yes,No,0


In [17]:
usecase_info = usecase_info.T

In [19]:
usecase_info

Unnamed: 0,193642
UserID,13
WindowID,6
Split,Test
City,Philadelphia
State,PA
Country,US
ZipCode,19143
DegreeType,Bachelor's
Major,Psychological & Social Sciences
GraduationDate,2011-12-01 00:00:00


In [18]:
usecase_history

Unnamed: 0,UserID,WindowID,Split,JobTitle
466215,13,6,Test,Pennsylvania Mentor
466216,13,6,Test,Student Worker
466217,13,6,Test,Internship in Adoption Unit
466218,13,6,Test,Student Worker - Continuing Education
466219,13,6,Test,Sales Associate


In [20]:
usecase_info.to_csv(f'./xai_posthoc/uid{test_uid}_logreg_xai_viz_user_profile.csv', header=True, index=True)

In [21]:
usecase_history.to_csv(f'./xai_posthoc/uid{test_uid}_logreg_xai_viz_user_history.csv', header=True, index=True)

# Load recommendation data by LogReg

In [23]:
rec_result_logreg = pd.read_csv(f'./nb_xai_viz/logreg_posthoc_recdata_full_uid{test_uid}.csv')

In [24]:
rec_result_logreg

Unnamed: 0,UserID,JobID,Y_prob,Y_pred,rank,City,State,Country,DegreeType,WorkHistoryCount,...,job_matrix_92,job_matrix_93,job_matrix_94,job_matrix_95,job_matrix_96,job_matrix_97,job_matrix_98,job_matrix_99,label,Y_pred_posthoc
0,13,248446,0.970798,1,0,0.0,1.0,1.0,4.0,6.0,...,0.0,0.136861,0.0,0.0,0.250736,0.104661,0.246267,0.0,0,1
1,13,367918,0.937227,1,1,0.0,1.0,1.0,4.0,6.0,...,0.597638,0.0,0.148084,0.0,0.226717,0.165611,0.0,0.0,0,1
2,13,580991,0.445008,0,2,0.0,0.0,1.0,4.0,6.0,...,0.0,0.0,0.10901,0.0,0.166895,0.365738,0.0,0.0,0,0
3,13,172155,0.351762,0,3,0.0,0.0,1.0,4.0,6.0,...,0.0,0.0,0.106406,0.0,0.244363,0.0,0.0,0.0,0,0
4,13,799192,0.340405,0,4,0.0,0.0,1.0,4.0,6.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.200566,0,0
5,13,241804,0.321821,0,5,0.0,0.0,1.0,4.0,6.0,...,0.0,0.0,0.0,0.0,0.075539,0.0,0.129837,0.0,0,0
6,13,872594,0.319501,0,6,0.0,0.0,1.0,4.0,6.0,...,0.0,0.0,0.171569,0.0,0.131337,0.383752,0.0,0.0,0,0
7,13,876016,0.309969,0,7,0.0,0.0,1.0,4.0,6.0,...,0.0,0.0,0.0,0.0,0.08017,0.0,0.0,0.0,0,0
8,13,602061,0.305032,0,8,0.0,0.0,1.0,4.0,6.0,...,0.0,0.0,0.191616,0.0,0.0,0.214295,0.252119,0.0,0,0
9,13,979883,0.289789,0,9,0.0,0.0,1.0,4.0,6.0,...,0.0,0.0,0.0,0.0,0.148949,0.0,0.0,0.164564,0,0


### Use case: 1st recommended job by logreg model for post-hoc visualization

In [25]:
rec_result_logreg_job = job_set[job_set.JobID == rec_result_logreg.loc[0].JobID]

In [26]:
rec_result_logreg_job

Unnamed: 0,JobID,WindowID,Title,Description,Requirements,City,State,Country,Zip5,StartDate,EndDate
845924,248446,6,Secondary Social Studies Teacher,2012-2013 School Year Company Summary: Connect..., Highly qualified and certified to teach in P...,Bryn Mawr,PA,US,19010.0,2012-05-15 15:51:27.91,2012-06-14 23:59:00


In [27]:
rec_result_logreg_job[['Title', 'JobID', 'Description']].to_csv(f'./xai_posthoc/uid{test_uid}_logreg_xai_viz_job_description.txt', sep='\n', encoding='utf-8')

In [28]:
rec_result_logreg_job[['Title', 'JobID', 'Requirements']].to_csv(f'./xai_posthoc/uid{test_uid}_logreg_xai_viz_job_requirement.txt', sep='\n', encoding='utf-8')

# Load recommendation data by EBM

In [29]:
rec_result_ebm = pd.read_csv(f'./nb_xai_viz/ebm_local_recdata_full_uid{test_uid}.csv')

In [30]:
rec_result_ebm

Unnamed: 0,UserID,JobID,Y_prob,Y_pred,rank,City,State,Country,DegreeType,CurrentlyEmployed,ManagedOthers,WorkHistoryTopic,WorkHistoryLevel,SeniorLevel,ReqTopic,DescTopic,TitTopic,label,Y_pred_posthoc
0,13,32115,0.665981,1,0,0.0,0.0,1.0,4.0,1.0,0.0,0.0,3.0,2.0,15,13,0,0.0,1
1,13,1081720,0.665981,1,1,0.0,0.0,1.0,4.0,1.0,0.0,0.0,3.0,2.0,15,13,0,0.0,1
2,13,494993,0.629645,1,2,0.0,0.0,1.0,4.0,1.0,0.0,0.0,3.0,2.0,6,18,0,0.0,1
3,13,755985,0.611904,1,3,0.0,0.0,1.0,4.0,1.0,0.0,0.0,3.0,2.0,15,18,0,0.0,1
4,13,602478,0.611904,1,4,0.0,0.0,1.0,4.0,1.0,0.0,0.0,3.0,2.0,15,18,0,0.0,1
5,13,299880,0.611904,1,5,0.0,0.0,1.0,4.0,1.0,0.0,0.0,3.0,2.0,15,18,0,0.0,1
6,13,873212,0.611904,1,6,0.0,0.0,1.0,4.0,1.0,0.0,0.0,3.0,2.0,15,18,0,0.0,1
7,13,757629,0.611904,1,7,0.0,0.0,1.0,4.0,1.0,0.0,0.0,3.0,2.0,15,18,0,0.0,1
8,13,628773,0.611904,1,8,0.0,0.0,1.0,4.0,1.0,0.0,0.0,3.0,2.0,15,18,0,0.0,1
9,13,110296,0.611904,1,9,1.0,1.0,1.0,4.0,1.0,0.0,0.0,3.0,2.0,15,18,0,0.0,1


In [31]:
rec_result_ebm_job = job_set[job_set.JobID == rec_result_ebm.loc[0].JobID]

In [32]:
rec_result_ebm_job

Unnamed: 0,JobID,WindowID,Title,Description,Requirements,City,State,Country,Zip5,StartDate,EndDate
823340,32115,6,"Specialized Transportation Services, Inc",Specialized Transportation Service...,Please refer to the Job Description to view th...,Smyrna,TN,US,37167.0,2012-05-12 00:10:16.827,2012-06-11 23:59:59


In [33]:
rec_result_ebm_job[['Title', 'JobID', 'Description']].to_csv(f'./xai_posthoc/uid{test_uid}_ebm_xai_viz_job_description.txt', sep='\n', encoding='utf-8')

In [34]:
rec_result_ebm_job[['Title', 'JobID', 'Requirements']].to_csv(f'./xai_posthoc/uid{test_uid}_ebm_xai_viz_job_requirement.txt', sep='\n', encoding='utf-8')