# Fixed effect analysis

In [41]:
#Packages importation
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
import statsmodels.api as sm

In [2]:
#Data importation
jocas = pd.read_csv("~/skills/jocas_2022.csv", low_memory=False)
jocas.head(5)

Unnamed: 0.1,Unnamed: 0,url,date_first_seen_day,date_scraping,site_name,site_child,scraping_failure_status,id_jocas,date_first_disappeared_day,date_last_seen_day,...,partner_name,partner_status,teleworking_accepted,teleworking_type,teleworking_mentioned,experience_min,experience_max,education_level,education_field,rome_loc_firm
0,apec.2,https://www.apec.fr/cms/webservices/offre/publ...,2022-01-01,Sat Jan 1 21:21:59 2022,apec,,False,APEC_2022-01-01_2,,,...,,False,,,False,5.0,,,,True
1,apec.3,https://www.apec.fr/cms/webservices/offre/publ...,2022-01-01,Sat Jan 1 21:22:00 2022,apec,,False,APEC_2022-01-01_3,,,...,,False,,,False,5.0,,,,True
2,apec.4,https://www.apec.fr/cms/webservices/offre/publ...,2022-01-01,Sat Jan 1 21:22:01 2022,apec,,False,APEC_2022-01-01_4,,,...,,False,,,False,10.0,,,,True
3,apec.5,https://www.apec.fr/cms/webservices/offre/publ...,2022-01-01,Sat Jan 1 21:22:02 2022,apec,,False,APEC_2022-01-01_5,,,...,,False,,,False,3.0,,,,True
4,apec.6,https://www.apec.fr/cms/webservices/offre/publ...,2022-01-01,Sat Jan 1 21:22:04 2022,apec,,False,APEC_2022-01-01_6,,,...,,False,,,False,5.0,,,,True


In [3]:
jocas.shape

(1730680, 57)

In [42]:
#Random sample
jocas_s = jocas.sample(n=100000, random_state=0).copy()
jocas_s.shape

(100000, 57)

## Fixed effect regressions

### ROME fixed effects

In [6]:
# Creating dummies for each ROME code
fe_jocas = pd.get_dummies(jocas_s['job_rome_code'])
fe_jocas

Unnamed: 0,A1101,A1201,A1202,A1203,A1204,A1205,A1301,A1302,A1303,A1401,...,N4105,N4201,N4202,N4203,N4204,N4301,N4302,N4401,N4402,N4403
1398126,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
492654,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1221841,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1541712,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
573294,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
160225,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1156906,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
827290,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1691242,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
jocas_fe = pd.concat([jocas_s, fe_jocas], axis=1)
jocas_fe.shape

(100000, 545)

In [7]:
# Taking the log of the salary
jocas_fe['salary_min'] = np.log(jocas_fe['salary_min'])
jocas_fe['salary_min'].describe()

count    100000.000000
mean         10.401862
std           0.359100
min           9.864612
25%          10.165852
50%          10.308953
75%          10.596635
max          13.122363
Name: salary_min, dtype: float64

In [10]:
# Creating exogene and endogene variables for the regression
y = jocas_fe['salary_min']
X = fe_jocas
X = sm.add_constant(X)

In [11]:
# Creating a test and a train datasets
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [13]:
#ROME fixed effects only
model_rome = sm.OLS(y_train, x_train)
results_rome = model_rome.fit()

In [14]:
print(results_rome.summary())

                            OLS Regression Results                            
Dep. Variable:             salary_min   R-squared:                       0.450
Model:                            OLS   Adj. R-squared:                  0.447
Method:                 Least Squares   F-statistic:                     135.2
Date:                Thu, 27 Jul 2023   Prob (F-statistic):               0.00
Time:                        07:36:06   Log-Likelihood:                -7594.6
No. Observations:               80000   AIC:                         1.615e+04
Df Residuals:                   79518   BIC:                         2.063e+04
Df Model:                         481                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         10.6405      0.044    239.228      0.0

In [15]:
# RMSE
predicted_values_rome = results_rome.predict(x_test)
residuals_rome = y_test - predicted_values_rome
squared_residuals_rome = residuals_rome ** 2
mean_squared_error_rome = squared_residuals_rome.mean()
rmse_rome = np.sqrt(mean_squared_error_rome)
print(rmse_rome)

0.2692079402660899


In [16]:
# R squared on test sample
sse_test = ((y_test - predicted_values_rome) ** 2).sum()
sst_test = ((y_test - y_test.mean()) ** 2).sum()
r_squared_test_rome = 1 - (sse_test / sst_test)
print(r_squared_test_rome)

0.4426420174342035


### ROME and MSA fixed effects

In [43]:
ze = pd.read_csv("~/skills/zones_emploi_2022.csv", sep=';', encoding='latin',low_memory=False)
ze.head(5)

Unnamed: 0,CODGEO,LIBGEO,ZE2020,LIBZE2020,ZE2020_PARTIE_REG,DEP,REG
0,1001,L'Abergement-Clémenciat,8405,Bourg en Bresse,,1,84
1,1002,L'Abergement-de-Varey,8405,Bourg en Bresse,,1,84
2,1004,Ambérieu-en-Bugey,8405,Bourg en Bresse,,1,84
3,1005,Ambérieux-en-Dombes,8434,Villefranche-sur-Saône,,1,84
4,1006,Ambléon,8404,Belley,,1,84


In [44]:
jocas_s = jocas_s[jocas_s['location_zipcode'].notnull()]
jocas_s.shape

(89479, 57)

In [45]:
jocas_s['location_zipcode'] = jocas_s['location_zipcode'].round().astype(str).apply(lambda x: x.replace('.0',''))
jocas_s['location_zipcode'].head(5)

1221841    93500
1541712    38000
573294     59610
869754     33310
1224567    76000
Name: location_zipcode, dtype: object

In [46]:
jocas_s['location_label'].head(5)

1221841      PANTIN
1541712    Grenoble
573294     FOURMIES
869754      Lormont
1224567       ROUEN
Name: location_label, dtype: object

In [47]:
mapping_dict = dict(zip(ze['LIBGEO'].str.lower(), ze['ZE2020']))
jocas_s['ZE2020'] = None
jocas_s['location_label'] = jocas_s['location_label'].str.lower()
jocas_s['location_label'] = jocas_s['location_label'].astype('str').apply(lambda x: x.replace(' ','-'))
jocas_s['ZE2020'] = jocas_s['location_label'].map(mapping_dict)

In [48]:
jocas_s['ZE2020'] = jocas_s['ZE2020'].astype(str).apply(lambda x: x.replace('.0',''))
jocas_s['ZE2020'].head(5)

1221841    1109
1541712    8409
573294     3217
869754     7505
1224567    2815
Name: ZE2020, dtype: object

In [78]:
jocas_s['ZE2020'].nunique()

299

In [49]:
jocas_s = jocas_s[jocas_s['ZE2020'].astype('float').notnull()]
jocas_s.shape

(71324, 58)

In [50]:
# Creating a unique id by ROME*ZE
jocas_s['unique_id'] = jocas_s.job_rome_code.astype('str') + '_' + jocas_s.ZE2020.astype('str')

In [51]:
# Creating dummies for each ze
ze_jocas = pd.get_dummies(jocas_s['unique_id'])
ze_jocas

Unnamed: 0,A1101_1101,A1101_1106,A1101_1113,A1101_3216,A1101_3217,A1101_53,A1101_7501,A1101_7502,A1101_7516,A1101_7604,...,nan_4423,nan_53,nan_59,nan_60,nan_7532,nan_7625,nan_8401,nan_8413,nan_8421,nan_8433
1221841,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1541712,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
573294,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
869754,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1224567,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1005505,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
160225,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1156906,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
827290,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [52]:
jocas_ze = pd.concat([jocas_s, ze_jocas], axis=1)
jocas_ze.shape

(71324, 18243)

In [53]:
# Taking the lof of the salary
jocas_ze['salary_min'] = np.log(jocas_ze['salary_min'])
jocas_ze['salary_min'].describe()

count    71324.000000
mean        10.403844
std          0.363080
min          9.864612
25%         10.165852
50%         10.308953
75%         10.596635
max         13.122363
Name: salary_min, dtype: float64

In [54]:
# Creating exogene and endogene variables for the regression
y = jocas_ze['salary_min']
X = ze_jocas
X = sm.add_constant(X)

In [55]:
X.shape

(71324, 18185)

In [56]:
# Creating a tets and a train datasets
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [57]:
# Both ROME and ZE fixed effects
model_ze = sm.OLS(y_train, x_train)
results_ze = model_ze.fit()

In [58]:
print(results_ze.summary())

                            OLS Regression Results                            
Dep. Variable:             salary_min   R-squared:                       0.678
Model:                            OLS   Adj. R-squared:                  0.550
Method:                 Least Squares   F-statistic:                     5.277
Date:                Mon, 18 Sep 2023   Prob (F-statistic):               0.00
Time:                        08:51:55   Log-Likelihood:                 9121.5
No. Observations:               57059   AIC:                         1.431e+04
Df Residuals:                   40782   BIC:                         1.600e+05
Df Model:                       16276                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         10.3336      0.002   6576.290      0.0

In [59]:
# RMSE
predicted_values_ze = results_ze.predict(x_test)
residuals_ze = y_test - predicted_values_ze
squared_residuals_ze = residuals_ze ** 2
mean_squared_error_ze = squared_residuals_ze.mean()
rmse_ze = np.sqrt(mean_squared_error_ze)
print(rmse_ze)

0.27176458629604877


In [60]:
# R squared on test sample
sse_test = ((y_test - predicted_values_ze) ** 2).sum()
sst_test = ((y_test - y_test.mean()) ** 2).sum()
r_squared_test_ze = 1 - (sse_test / sst_test)
print(r_squared_test_ze)

0.4349615112823947


## NLP model

In [64]:
# Load the NLP model
nlp_model = tf.keras.models.load_model('french_model')

2023-09-19 07:43:26.108749: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'trimmer_trim_roundrobintrimmer_generate_mask_while_tile_multiples_trimmer_trim_roundrobintrimmer_generate_mask_strided_slice_0' with dtype int32
	 [[{{node trimmer_trim_roundrobintrimmer_generate_mask_while_tile_multiples_trimmer_trim_roundrobintrimmer_generate_mask_strided_slice_0}}]]
2023-09-19 07:43:26.108856: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'trimmer_trim_roundrobintrimmer_generate_mask_while_tile_multiples_trimmer_trim_roundrobintrimmer_generate_mask_strided_slice_0' with dtype int32
	 [[{{node trimmer_trim_roundrobintrimmer_

In [65]:
# Re-define the train and test datasets
y_test = residuals_ze
X = jocas_s['description_full']

x_train, x_test = train_test_split(X, test_size=0.2, random_state=0)
print(x_test.shape)
print(y_test.shape)

(14265,)
(14265,)


In [66]:
print(x_train.shape)
print(y_train.shape)

(57059,)
(57059,)


In [38]:
# Re-training the model with the residuals
checkpoint = tf.keras.callbacks.ModelCheckpoint("nlp_model", monitor="val_loss", mode='min', save_best_only=True)
nlp_model.compile(loss='mean_squared_error', optimizer='adam', metrics=[tf.keras.metrics.RootMeanSquaredError()])
nlp_model.fit(x_train, y_train, batch_size=8, epochs=4, validation_data=(x_test, y_test), callbacks=[checkpoint])

Epoch 1/4


2023-09-17 17:27:46.455803: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype string and shape [?]
	 [[{{node inputs}}]]
2023-09-17 17:27:46.681213: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype int32 and shape [?,128]
	 [[{{node inputs}}]]
2023-09-17 17:27:46.682146: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs_2' with dtype int32 and shape [?,128]
	 [[{{node inputs_2}}]]
2023-09-17 17:2



2023-09-17 17:38:41.389093: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype string and shape [?]
	 [[{{node inputs}}]]
2023-09-17 17:38:41.414708: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'input_2' with dtype string and shape [?]
	 [[{{node input_2}}]]
2023-09-17 17:38:41.472315: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype int32 and shape [?,128]
	 [[{{node inputs}}]]
2023-09-17 17:38:41.

INFO:tensorflow:Assets written to: nlp_model/assets


INFO:tensorflow:Assets written to: nlp_model/assets


Epoch 2/4
Epoch 3/4

2023-09-17 18:00:26.817247: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'input_2' with dtype string and shape [?]
	 [[{{node input_2}}]]
2023-09-17 18:00:27.016900: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'input_2' with dtype string and shape [?]
	 [[{{node input_2}}]]
2023-09-17 18:00:27.154338: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'input_2' with dtype string and shape [?]
	 [[{{node input_2}}]]
2023-09-17 18:00:27

INFO:tensorflow:Assets written to: nlp_model/assets


INFO:tensorflow:Assets written to: nlp_model/assets


Epoch 4/4


<keras.callbacks.History at 0x7f2a3833d6d0>

In [40]:
# R squared on the test sample
from sklearn.metrics import r2_score
resid_preds = nlp_model.predict(x_test)
r2_score(y_test, resid_preds)



-1054.6801029871742

## Training a new model with residuals

In [68]:
bert_preprocess = hub.load("https://tfhub.dev/jeongukjae/xlm_roberta_multi_cased_preprocess/1")
bert_embedding = hub.load("https://tfhub.dev/jeongukjae/xlm_roberta_multi_cased_L-12_H-768_A-12/1")

2023-09-19 07:46:08.196785: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'trimmer_trim_roundrobintrimmer_generate_mask_while_tile_multiples_trimmer_trim_roundrobintrimmer_generate_mask_strided_slice_0' with dtype int32
	 [[{{node trimmer_trim_roundrobintrimmer_generate_mask_while_tile_multiples_trimmer_trim_roundrobintrimmer_generate_mask_strided_slice_0}}]]
2023-09-19 07:46:08.196926: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'trimmer_trim_roundrobintrimmer_generate_mask_while_tile_multiples_trimmer_trim_roundrobintrimmer_generate_mask_strided_slice_0' with dtype int32
	 [[{{node trimmer_trim_roundrobintrimmer_

In [69]:
french_preprocessor = hub.KerasLayer(bert_preprocess,
                                    trainable=False)
french_vectorizer = hub.KerasLayer(bert_embedding,
                                  input_shape=[],
                                 dtype=tf.string,
                                 trainable=False)

In [71]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Embedding, Conv1D, GlobalMaxPooling1D, Flatten, BatchNormalization, Dense

In [72]:
input1 = Input(shape=[], dtype='string')
preprocessor_layer = french_preprocessor(input1)
vectorizer_layer = french_vectorizer(preprocessor_layer)
bert_layer = vectorizer_layer['sequence_output']
conv1 = Conv1D(filters=64, kernel_size=4, activation='relu')(bert_layer)
pool1 = GlobalMaxPooling1D()(conv1)
flat = Flatten()(pool1)
norm = BatchNormalization()(flat)
dense1 = Dense(64, activation='relu', kernel_initializer='RandomNormal')(norm)
dense2 = Dense(1, activation='relu', kernel_initializer='RandomNormal')(dense1)
fe_res_model = Model(inputs=input1, outputs=dense2)

print(fe_res_model.summary())

2023-09-19 07:47:51.604704: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype string and shape [?]
	 [[{{node inputs}}]]
2023-09-19 07:47:51.639684: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder' with dtype string and shape [?]
	 [[{{node Placeholder}}]]
2023-09-19 07:47:51.752200: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs_2' with dtype int32 and shape [?,128]
	 [[{{node inputs_2}}]]
2023-09-

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None,)]            0           []                               
                                                                                                  
 keras_layer (KerasLayer)       {'input_mask': (Non  0           ['input_1[0][0]']                
                                e, 128),                                                          
                                 'input_word_ids':                                                
                                (None, 128),                                                      
                                 'input_type_ids':                                                
                                (None, 128)}                                                  

2023-09-19 07:47:51.829715: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder_1' with dtype int32 and shape [?,128]
	 [[{{node Placeholder_1}}]]
2023-09-19 07:47:51.829812: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder_2' with dtype int32 and shape [?,128]
	 [[{{node Placeholder_2}}]]


In [65]:
# Re-define the train and test datasets
y_test = residuals_ze
X = jocas_s['description_full']

x_train, x_test = train_test_split(X, test_size=0.2, random_state=0)
print(x_test.shape)
print(y_test.shape)

(14265,)
(14265,)


In [None]:
#Training the model with the residuals
checkpoint = tf.keras.callbacks.ModelCheckpoint("fe_res_model", monitor="val_loss", mode='min', save_best_only=True)
fe_res_model.compile(loss='mean_squared_error', optimizer='adam', metrics=[tf.keras.metrics.RootMeanSquaredError()])
fe_res_model.fit(x_train, y_train, batch_size=8, epochs=4, validation_data=(x_test, y_test), callbacks=[checkpoint])

Epoch 1/4


2023-09-19 07:50:10.672178: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/roberta_encoder/StatefulPartitionedCall_grad/roberta_encoder/StatefulPartitionedCall_3' with dtype float and shape [?,?,768]
	 [[{{node gradients/roberta_encoder/StatefulPartitionedCall_grad/roberta_encoder/StatefulPartitionedCall_3}}]]
2023-09-19 07:50:10.672254: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/roberta_encoder/StatefulPartitionedCall_grad/roberta_encoder/StatefulPartitionedCall_4' with dtype float and shape [?,?,768]
	 [[{{node gradients/roberta_encoder/StatefulPartitionedCall_grad/roberta_encoder/StatefulPa



2023-09-19 09:29:10.042742: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype string and shape [?]
	 [[{{node inputs}}]]
2023-09-19 09:29:10.127407: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'input_1' with dtype string and shape [?]
	 [[{{node input_1}}]]
2023-09-19 09:29:10.205788: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs_2' with dtype int32 and shape [?,128]
	 [[{{node inputs_2}}]]
2023-09-19 09:29

INFO:tensorflow:Assets written to: fe_res_model/assets


INFO:tensorflow:Assets written to: fe_res_model/assets


Epoch 2/4

2023-09-19 11:07:46.219967: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'input_1' with dtype string and shape [?]
	 [[{{node input_1}}]]
2023-09-19 11:07:46.455167: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype string and shape [?]
	 [[{{node inputs}}]]
2023-09-19 11:07:46.480558: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'input_1' with dtype string and shape [?]
	 [[{{node input_1}}]]
2023-09-19 11:07:46.5

INFO:tensorflow:Assets written to: fe_res_model/assets


INFO:tensorflow:Assets written to: fe_res_model/assets


Epoch 3/4

In [None]:
tf.keras.saving.save_model(fe_res_model, 'fe_res_model_save', save_format='tf')

In [76]:
y_preds_fe = fe_res_model.predict(x_test)
#rmse = tf.keras.metrics.RootMeanSquaredError(y_test, y_preds)
y_result=pd.DataFrame()
y_result['ln_salary']=y_test
y_result['predictions']=y_preds_fe
#y_result['rmse']=rmse
y_result.head(10)



Unnamed: 0,ln_salary,predictions
1217971,-0.0205396,10.396832
817518,-0.02468044,10.284751
1024758,7.638334e-14,10.08466
148034,-0.1772337,10.626722
1376816,-0.207002,9.824197
1139573,-0.1749627,10.407157
927758,2.4869e-14,9.915341
813509,-0.2006707,10.059413
1459269,-0.1238004,10.352976
1561369,0.01527427,10.217519


In [77]:
from sklearn.metrics import r2_score
r2_score = r2_score(y_result['ln_salary'], y_result['predictions'])
print(r2_score)

-1460.631676225615
