In [1]:
import nltk
import numpy as np
import pandas as pd
from tensorflow import keras
from tensorflow.keras.layers import Dense
from sentence_transformers import SentenceTransformer

sbert_model = SentenceTransformer('bert-base-nli-mean-tokens')

In [2]:
def cosine(u, v):
    return np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v))

In [3]:
# You have to adjust the
df_arguments = pd.read_csv("arguments_train.csv")
df_keypoints = pd.read_csv("key_points_train.csv")
df_labels = pd.read_csv("labels_train.csv")

In [4]:
df_arguments

Unnamed: 0,arg_id,argument,topic,stance
0,arg_0_0,`people reach their limit when it comes to the...,Assisted suicide should be a criminal offence,-1
1,arg_0_1,A patient should be able to decide when they h...,Assisted suicide should be a criminal offence,-1
2,arg_0_2,a person has the right to end their suffering ...,Assisted suicide should be a criminal offence,-1
3,arg_0_3,a person should have the dignity to choose how...,Assisted suicide should be a criminal offence,-1
4,arg_0_4,a person should have the right to be able to c...,Assisted suicide should be a criminal offence,-1
...,...,...,...,...
5578,arg_27_218,we should subsidize vocational education to en...,We should subsidize vocational education,1
5579,arg_27_219,We should subsidize vocational education to su...,We should subsidize vocational education,1
5580,arg_27_220,While many who graduate from universities stru...,We should subsidize vocational education,1
5581,arg_27_221,with the rising cost of college tuition vocati...,We should subsidize vocational education,1


In [5]:
df_keypoints

Unnamed: 0,key_point_id,key_point,topic,stance
0,kp_0_0,Assisted suicide gives dignity to the person t...,Assisted suicide should be a criminal offence,-1
1,kp_0_1,Assisted suicide reduces suffering,Assisted suicide should be a criminal offence,-1
2,kp_0_2,People should have the freedom to choose to en...,Assisted suicide should be a criminal offence,-1
3,kp_0_3,The terminally ill would benefit from assisted...,Assisted suicide should be a criminal offence,-1
4,kp_0_4,Assisted suicide allows people to solicit some...,Assisted suicide should be a criminal offence,1
...,...,...,...,...
202,kp_27_3,subsidizing vocational education is expensive,We should subsidize vocational education,-1
203,kp_27_4,subsidizing vocational education promotes thos...,We should subsidize vocational education,1
204,kp_27_5,vocational education is a good career choice,We should subsidize vocational education,1
205,kp_27_6,vocational education better fits many students,We should subsidize vocational education,1


In [6]:
df_labels

Unnamed: 0,arg_id,key_point_id,label
0,arg_0_0,kp_0_0,0
1,arg_0_121,kp_0_4,0
2,arg_0_121,kp_0_5,0
3,arg_0_121,kp_0_6,1
4,arg_0_121,kp_0_7,0
...,...,...,...
20630,arg_27_221,kp_27_6,0
20631,arg_27_221,kp_27_7,0
20632,arg_27_222,kp_27_4,0
20633,arg_27_222,kp_27_5,1


In [7]:
df_labels_ones = df_labels[df_labels["label"] == 1]
df_labels_ones


Unnamed: 0,arg_id,key_point_id,label
3,arg_0_121,kp_0_6,1
9,arg_0_1,kp_0_2,1
11,arg_0_2,kp_0_1,1
14,arg_0_3,kp_0_2,1
18,arg_0_4,kp_0_2,1
...,...,...,...
20604,arg_27_89,kp_27_1,1
20605,arg_27_90,kp_27_2,1
20614,arg_27_92,kp_27_3,1
20623,arg_27_220,kp_27_5,1


In [8]:
df_labels_zeros = df_labels[df_labels["label"] == 0]
df_labels_zeros

Unnamed: 0,arg_id,key_point_id,label
0,arg_0_0,kp_0_0,0
1,arg_0_121,kp_0_4,0
2,arg_0_121,kp_0_5,0
4,arg_0_121,kp_0_7,0
5,arg_0_121,kp_0_8,0
...,...,...,...
20629,arg_27_221,kp_27_4,0
20630,arg_27_221,kp_27_6,0
20631,arg_27_221,kp_27_7,0
20632,arg_27_222,kp_27_4,0


In [9]:
df_labels_zeros_reduced = df_labels_zeros.sample(n=4260, random_state=1)
df_labels_zeros_reduced

Unnamed: 0,arg_id,key_point_id,label
1310,arg_1_157,kp_1_8,0
2953,arg_2_83,kp_2_5,0
3714,arg_3_58,kp_3_4,0
13003,arg_19_159,kp_19_4,0
11129,arg_14_109,kp_14_2,0
...,...,...,...
1474,arg_1_177,kp_1_6,0
1703,arg_1_76,kp_1_2,0
4804,arg_5_198,kp_5_5,0
11417,arg_16_44,kp_16_1,0


In [10]:
df_labels_merged0 = np.vstack((df_labels_ones, df_labels_zeros_reduced))
#垂直合并
df_labels_merged0

array([['arg_0_121', 'kp_0_6', 1],
       ['arg_0_1', 'kp_0_2', 1],
       ['arg_0_2', 'kp_0_1', 1],
       ...,
       ['arg_5_198', 'kp_5_5', 0],
       ['arg_16_44', 'kp_16_1', 0],
       ['arg_13_15', 'kp_13_2', 0]], dtype=object)

In [11]:
np.random.shuffle(df_labels_merged0)
#顺序打乱
df_labels_merged = pd.DataFrame(df_labels_merged0)
#重新转化为DataFrame
df_labels_merged.columns = ["arg_id", "key_point_id", "label"]
df_labels_merged
#df_labels_merged_labels = df_labels_merged[]
#df_labels_merged_labels

Unnamed: 0,arg_id,key_point_id,label
0,arg_12_199,kp_12_5,1
1,arg_6_3,kp_6_0,1
2,arg_25_213,kp_25_5,0
3,arg_11_0,kp_11_3,1
4,arg_24_83,kp_24_2,1
...,...,...,...
8515,arg_12_33,kp_12_1,0
8516,arg_3_40,kp_3_3,0
8517,arg_5_157,kp_5_9,0
8518,arg_6_36,kp_6_3,0


In [12]:
merged_dataset=df_labels_merged.merge(df_arguments, left_on="arg_id", right_on="arg_id")
# df_arguments.loc[df_arguments['arg_id'] == "arg_4_121"]
full_dataset_train=merged_dataset.merge(df_keypoints, left_on="key_point_id", right_on="key_point_id")
full_dataset_train

Unnamed: 0,arg_id,key_point_id,label,argument,topic_x,stance_x,key_point,topic_y,stance_y
0,arg_12_199,kp_12_5,1,the private military companies are owned by el...,We should ban private military companies,1,Private military companies main interest is pr...,We should ban private military companies,1
1,arg_12_181,kp_12_5,1,private military companies should be banned be...,We should ban private military companies,1,Private military companies main interest is pr...,We should ban private military companies,1
2,arg_12_190,kp_12_5,0,private miltary companies are open to abusing ...,We should ban private military companies,1,Private military companies main interest is pr...,We should ban private military companies,1
3,arg_12_163,kp_12_5,1,Private military companies have their bottom l...,We should ban private military companies,1,Private military companies main interest is pr...,We should ban private military companies,1
4,arg_12_219,kp_12_5,0,we should ban private military companies becau...,We should ban private military companies,1,Private military companies main interest is pr...,We should ban private military companies,1
...,...,...,...,...,...,...,...,...,...
8515,arg_17_131,kp_17_2,1,nuclear weapons create unnecessary political t...,We should fight for the abolition of nuclear w...,1,Nuclear weapons provoke or escalate a disastro...,We should fight for the abolition of nuclear w...,1
8516,arg_17_172,kp_17_2,0,there is no need for nuclear weapons anywhere ...,We should fight for the abolition of nuclear w...,1,Nuclear weapons provoke or escalate a disastro...,We should fight for the abolition of nuclear w...,1
8517,arg_17_164,kp_17_2,0,The production of nuclear weapons exposes many...,We should fight for the abolition of nuclear w...,1,Nuclear weapons provoke or escalate a disastro...,We should fight for the abolition of nuclear w...,1
8518,arg_17_104,kp_17_2,0,"If something exists, it is impossible to ensur...",We should fight for the abolition of nuclear w...,1,Nuclear weapons provoke or escalate a disastro...,We should fight for the abolition of nuclear w...,1


In [13]:
altdata_arguments = sbert_model.encode(full_dataset_train["argument"])
# altdata_arguments
altdata_keypoints = sbert_model.encode(full_dataset_train["key_point"])
# altdata_keypoints
training = np.array(np.hstack((altdata_arguments, altdata_keypoints)))
training.shape
#输出(8520, 1536)，是将8520个argument的arg和kp向量整合了（768*2）

(8520, 1536)

In [14]:
labels = np.array(full_dataset_train["label"])
labels.dtype
print(labels)
labels=np.asarray(labels).astype(np.int)
#training.dtype
#training.dtypes

[1 1 0 ... 0 0 1]


In [15]:
model = keras.Sequential()
model.add(Dense(40, activation='relu', input_dim=(1536)))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy')
model.fit(training, labels, epochs=1, verbose=1, batch_size = 1)
#model.summary()



<tensorflow.python.keras.callbacks.History at 0x1588f545520>

In [16]:
model.save('RegressionNNFullData.h5')

In [17]:
pred = model.predict(training)
pred

array([[0.3712124 ],
       [0.2504996 ],
       [0.473503  ],
       ...,
       [0.79017794],
       [0.8087212 ],
       [0.8820193 ]], dtype=float32)

In [18]:
labels

array([1, 1, 0, ..., 0, 0, 1])

In [19]:
pred = pred.flatten()
#转化为一维向量

res = np.vstack((pred, labels))
df_res = pd.DataFrame(res)
df_res = df_res.transpose()
df_res.columns = ["predicted", "true"]
df_res['predicted_label'] = np.where(df_res['predicted'] >= 0.5, 1, 0)
df_res['match'] = np.where(df_res['true'] == df_res['predicted_label'], 1, 0)
#新增了两列
print(df_res)
df_res['match'].sum()
accuracy = df_res['match'].sum() / df_res.shape[0]
accuracy

      predicted  true  predicted_label  match
0      0.371212   1.0                0      0
1      0.250500   1.0                0      0
2      0.473503   0.0                0      1
3      0.473503   1.0                0      0
4      0.467034   0.0                0      1
...         ...   ...              ...    ...
8515   0.852757   1.0                1      1
8516   0.885341   0.0                1      0
8517   0.790178   0.0                1      0
8518   0.808721   0.0                1      0
8519   0.882019   1.0                1      1

[8520 rows x 4 columns]


0.6884976525821597

In [20]:
df_res.describe()

Unnamed: 0,predicted,true,predicted_label,match
count,8520.0,8520.0,8520.0,8520.0
mean,0.484491,0.5,0.411268,0.688498
std,0.247514,0.500029,0.492092,0.463135
min,0.001333,0.0,0.0,0.0
25%,0.305613,0.0,0.0,0.0
50%,0.473503,0.5,0.0,1.0
75%,0.659628,1.0,1.0,1.0
max,0.998034,1.0,1.0,1.0


In [21]:
df_one = df_res[df_res["true"] == 1]
df_one.describe()

Unnamed: 0,predicted,true,predicted_label,match
count,4260.0,4260.0,4260.0,4260.0
mean,0.606075,1.0,0.599765,0.599765
std,0.215807,0.0,0.490003,0.490003
min,0.021341,1.0,0.0,0.0
25%,0.473503,1.0,0.0,0.0
50%,0.58931,1.0,1.0,1.0
75%,0.79097,1.0,1.0,1.0
max,0.998034,1.0,1.0,1.0


In [22]:
df_zeros = df_res[df_res["true"] == 0]
df_zeros.describe()

Unnamed: 0,predicted,true,predicted_label,match
count,4260.0,4260.0,4260.0,4260.0
mean,0.362907,0.0,0.22277,0.77723
std,0.215397,0.0,0.416154,0.416154
min,0.001333,0.0,0.0,0.0
25%,0.177942,0.0,0.0,1.0
50%,0.371603,0.0,0.0,1.0
75%,0.473503,0.0,0.0,1.0
max,0.9632,0.0,1.0,1.0


In [23]:
df_arguments_dev = pd.read_csv("arguments_dev.csv")
df_keypoints_dev = pd.read_csv("key_points_dev.csv")
df_labels_dev = pd.read_csv("labels_dev.csv")

In [24]:
df_labels_dev

Unnamed: 0,arg_id,key_point_id,label
0,arg_4_121,kp_4_5,1
1,arg_4_121,kp_4_6,0
2,arg_4_121,kp_4_7,0
3,arg_4_122,kp_4_8,0
4,arg_4_122,kp_4_5,1
...,...,...,...
3453,arg_15_226,kp_15_3,0
3454,arg_15_226,kp_15_4,0
3455,arg_15_226,kp_15_5,1
3456,arg_15_226,kp_15_6,0


In [25]:
df_arguments_dev

Unnamed: 0,arg_id,argument,topic,stance
0,arg_4_0,having a school uniform can reduce bullying as...,We should abandon the use of school uniform,-1
1,arg_4_1,it is a good way to instill discipline,We should abandon the use of school uniform,-1
2,arg_4_2,it is cheaper for parents to buy school unifor...,We should abandon the use of school uniform,-1
3,arg_4_3,one of the benefits of school uniforms is that...,We should abandon the use of school uniform,-1
4,arg_4_4,researchers found that in schools across the w...,We should abandon the use of school uniform,-1
...,...,...,...,...
927,arg_15_222,we should end it and give people that are the ...,We should end affirmative action,1
928,arg_15_223,we should end it so that every persons are equal,We should end affirmative action,1
929,arg_15_224,we should evaluate candidates based solely on ...,We should end affirmative action,1
930,arg_15_225,we should select people based on their merit o...,We should end affirmative action,1


In [26]:
df_keypoints_dev

Unnamed: 0,key_point_id,key_point,topic,stance
0,kp_4_0,Children can still express themselves using ot...,We should abandon the use of school uniform,-1
1,kp_4_1,School uniform reduces bullying,We should abandon the use of school uniform,-1
2,kp_4_2,School uniforms encourage discipline or focus ...,We should abandon the use of school uniform,-1
3,kp_4_3,School uniforms saves costs,We should abandon the use of school uniform,-1
4,kp_4_4,School uniforms create a sense of equality/unity,We should abandon the use of school uniform,-1
5,kp_4_5,School uniform is harming the student's self e...,We should abandon the use of school uniform,1
6,kp_4_6,School uniforms are expensive,We should abandon the use of school uniform,1
7,kp_4_7,School uniforms are often uncomfortable/sexist,We should abandon the use of school uniform,1
8,kp_4_8,School uniform harms learning/creativity,We should abandon the use of school uniform,1
9,kp_4_9,School uniforms increase conformity or harm in...,We should abandon the use of school uniform,1


In [27]:
merged_dataset_dev=df_labels_dev.merge(df_arguments_dev, left_on="arg_id", right_on="arg_id")
# df_arguments.loc[df_arguments['arg_id'] == "arg_4_121"]
full_dataset_dev=merged_dataset_dev.merge(df_keypoints_dev, left_on="key_point_id", right_on="key_point_id")
full_dataset_dev

Unnamed: 0,arg_id,key_point_id,label,argument,topic_x,stance_x,key_point,topic_y,stance_y
0,arg_4_121,kp_4_5,1,A real education is about giving students the ...,We should abandon the use of school uniform,1,School uniform is harming the student's self e...,We should abandon the use of school uniform,1
1,arg_4_122,kp_4_5,1,children express themselves through the clothe...,We should abandon the use of school uniform,1,School uniform is harming the student's self e...,We should abandon the use of school uniform,1
2,arg_4_123,kp_4_5,1,"children should be able to dress as they wish,...",We should abandon the use of school uniform,1,School uniform is harming the student's self e...,We should abandon the use of school uniform,1
3,arg_4_124,kp_4_5,1,children should be allowed to express themselves,We should abandon the use of school uniform,1,School uniform is harming the student's self e...,We should abandon the use of school uniform,1
4,arg_4_126,kp_4_5,1,freedom of expression extends to the right to ...,We should abandon the use of school uniform,1,School uniform is harming the student's self e...,We should abandon the use of school uniform,1
...,...,...,...,...,...,...,...,...,...
3453,arg_15_218,kp_15_7,1,we should end affirmative action because stude...,We should end affirmative action,1,Affirmative action reduces quality,We should end affirmative action,1
3454,arg_15_219,kp_15_7,0,we should end affirmative action because this ...,We should end affirmative action,1,Affirmative action reduces quality,We should end affirmative action,1
3455,arg_15_223,kp_15_7,0,we should end it so that every persons are equal,We should end affirmative action,1,Affirmative action reduces quality,We should end affirmative action,1
3456,arg_15_225,kp_15_7,0,we should select people based on their merit o...,We should end affirmative action,1,Affirmative action reduces quality,We should end affirmative action,1


In [28]:
altdata_arguments_dev = sbert_model.encode(full_dataset_dev["argument"])
# altdata_arguments
altdata_keypoints_dev = sbert_model.encode(full_dataset_dev["key_point"])
# altdata_keypoints
dev = np.hstack((altdata_arguments_dev, altdata_keypoints_dev))
dev.shape

(3458, 1536)

In [29]:
labels_dev = np.array(full_dataset_dev["label"])

In [30]:
pred = model.predict(dev)
pred

array([[0.61408025],
       [0.28963453],
       [0.41223758],
       ...,
       [0.0833765 ],
       [0.46371314],
       [0.17582738]], dtype=float32)

In [31]:
pred = pred.flatten()

res = np.vstack((pred, labels_dev))
df_res = pd.DataFrame(res)
df_res = df_res.transpose()
df_res.columns = ["predicted", "true"]
df_res['predicted_label'] = np.where(df_res['predicted'] >= 0.5, 1, 0)
df_res['match'] = np.where(df_res['true'] == df_res['predicted_label'], 1, 0)
print(df_res)
df_res['match'].sum()
accuracy = df_res['match'].sum() / df_res.shape[0]
accuracy

      predicted  true  predicted_label  match
0      0.614080   1.0                1      1
1      0.289635   1.0                0      0
2      0.412238   1.0                0      0
3      0.178121   1.0                0      0
4      0.752372   1.0                1      1
...         ...   ...              ...    ...
3453   0.473503   1.0                0      0
3454   0.442855   0.0                0      1
3455   0.083376   0.0                0      1
3456   0.463713   0.0                0      1
3457   0.175827   0.0                0      1

[3458 rows x 4 columns]


0.7050318102949682

In [32]:
df_arguments_test = pd.read_csv("arguments_test.csv")
df_keypoints_test = pd.read_csv("key_points_test.csv")
df_labels_test = pd.read_csv("labels_test.csv")

In [33]:
df_labels_test

Unnamed: 0,arg_id,key_point_id,label
0,arg_0_112,kp_0_4,0
1,arg_0_112,kp_0_5,0
2,arg_0_112,kp_0_6,0
3,arg_0_112,kp_0_7,1
4,arg_0_0,kp_0_0,0
...,...,...,...
3421,arg_1_232,kp_1_9,0
3422,arg_0_111,kp_0_3,0
3423,arg_0_111,kp_0_0,1
3424,arg_0_111,kp_0_1,0


In [34]:
df_keypoints_test

Unnamed: 0,key_point_id,key_point,topic,stance
0,kp_0_0,"Routine child vaccinations, or their side effe...",Routine child vaccinations should be mandatory,-1
1,kp_0_1,Mandatory vaccination contradicts basic rights,Routine child vaccinations should be mandatory,-1
2,kp_0_2,The parents and not the state should decide,Routine child vaccinations should be mandatory,-1
3,kp_0_3,Routine child vaccinations are not necessary t...,Routine child vaccinations should be mandatory,-1
4,kp_0_4,Routine child vaccinations are effective,Routine child vaccinations should be mandatory,1
5,kp_0_5,Child vaccination saves lives,Routine child vaccinations should be mandatory,1
6,kp_0_6,Routine child vaccinations are necessary to pr...,Routine child vaccinations should be mandatory,1
7,kp_0_7,Routine child vaccinations should be mandatory...,Routine child vaccinations should be mandatory,1
8,kp_0_8,Children should not suffer from preventable di...,Routine child vaccinations should be mandatory,1
9,kp_1_0,Social media regulation is not effective,Social media platforms should be regulated by ...,-1


In [35]:
df_arguments_test

Unnamed: 0,arg_id,argument,topic,stance
0,arg_0_0,Routine child vaccinations isn't mandatory sin...,Routine child vaccinations should be mandatory,-1
1,arg_0_1,Routine child vaccinations should not be manda...,Routine child vaccinations should be mandatory,-1
2,arg_0_2,Routine child vaccinations should not be neces...,Routine child vaccinations should be mandatory,-1
3,arg_0_3,A vaccine that has not been sufficiently teste...,Routine child vaccinations should be mandatory,-1
4,arg_0_4,As long as vaccines are not free of side effec...,Routine child vaccinations should be mandatory,-1
...,...,...,...,...
718,arg_2_205,usa is an excellent country to live with wonde...,The USA is a good country to live in,1
719,arg_2_206,we all have the american dream. it is the best...,The USA is a good country to live in,1
720,arg_2_207,yes for its economic and labor stability,The USA is a good country to live in,1
721,arg_2_208,yes is the best country to live to live the ...,The USA is a good country to live in,1


In [36]:
merged_dataset_test=df_labels.merge(df_arguments_test, left_on="arg_id", right_on="arg_id")
# df_arguments.loc[df_arguments['arg_id'] == "arg_4_121"]
full_dataset_test=merged_dataset.merge(df_keypoints_test, left_on="key_point_id", right_on="key_point_id")
full_dataset_test

Unnamed: 0,arg_id,key_point_id,label,argument,topic_x,stance_x,key_point,topic_y,stance_y
0,arg_0_210,kp_0_7,0,many people do not have the mental capacity to...,Assisted suicide should be a criminal offence,1,Routine child vaccinations should be mandatory...,Routine child vaccinations should be mandatory,1
1,arg_0_177,kp_0_7,0,assisted suicide should be a criminal offence ...,Assisted suicide should be a criminal offence,1,Routine child vaccinations should be mandatory...,Routine child vaccinations should be mandatory,1
2,arg_0_150,kp_0_7,0,assisted suicide is murder of another and shou...,Assisted suicide should be a criminal offence,1,Routine child vaccinations should be mandatory...,Routine child vaccinations should be mandatory,1
3,arg_0_179,kp_0_7,0,assisted suicide should be a criminal offence ...,Assisted suicide should be a criminal offence,1,Routine child vaccinations should be mandatory...,Routine child vaccinations should be mandatory,1
4,arg_0_220,kp_0_7,0,people may easily be manipulated into being a ...,Assisted suicide should be a criminal offence,1,Routine child vaccinations should be mandatory...,Routine child vaccinations should be mandatory,1
...,...,...,...,...,...,...,...,...,...
1260,arg_0_94,kp_0_0,0,people should have the right to decide how and...,Assisted suicide should be a criminal offence,-1,"Routine child vaccinations, or their side effe...",Routine child vaccinations should be mandatory,-1
1261,arg_0_10,kp_0_0,1,assisted suicide allows terminally ill people ...,Assisted suicide should be a criminal offence,-1,"Routine child vaccinations, or their side effe...",Routine child vaccinations should be mandatory,-1
1262,arg_0_118,kp_0_0,1,We don't allow our pets to suffer but because ...,Assisted suicide should be a criminal offence,-1,"Routine child vaccinations, or their side effe...",Routine child vaccinations should be mandatory,-1
1263,arg_0_113,kp_0_0,0,the right to die is a personal choice and ever...,Assisted suicide should be a criminal offence,-1,"Routine child vaccinations, or their side effe...",Routine child vaccinations should be mandatory,-1


In [37]:
altdata_arguments_test = sbert_model.encode(full_dataset_test["argument"])
# altdata_arguments
altdata_keypoints_test = sbert_model.encode(full_dataset_test["key_point"])
# altdata_keypoints
training_test = np.hstack((altdata_arguments_test, altdata_keypoints_test))
training_test

array([[ 0.22633937,  0.554146  ,  0.19942318, ..., -0.16148338,
        -1.4278635 ,  0.53885126],
       [-0.04930202,  0.10250104, -0.18276763, ..., -0.16148338,
        -1.4278635 ,  0.53885126],
       [-0.12522715,  0.03689397, -0.09041947, ..., -0.16148338,
        -1.4278635 ,  0.53885126],
       ...,
       [ 0.15880401,  0.722718  ,  0.44542477, ..., -0.5389258 ,
        -1.2187271 ,  0.33487737],
       [ 0.05034905,  0.30275172,  0.89811814, ..., -0.5389258 ,
        -1.2187271 ,  0.33487737],
       [-0.54348224,  0.70539004,  0.6901144 , ..., -0.5389258 ,
        -1.2187271 ,  0.33487737]], dtype=float32)

In [38]:
labels_test = np.array(full_dataset_test["label"])

In [39]:
pred = model.predict(training_test)
pred

array([[0.6208413 ],
       [0.7763232 ],
       [0.78500783],
       ...,
       [0.80521667],
       [0.6045477 ],
       [0.73299015]], dtype=float32)

In [40]:
pred = pred.flatten()

res = np.vstack((pred, labels_test))
df_res = pd.DataFrame(res)
df_res = df_res.transpose()
df_res.columns = ["predicted", "true"]
df_res['predicted_label'] = np.where(df_res['predicted'] >= 0.5, 1, 0)
df_res['match'] = np.where(df_res['true'] == df_res['predicted_label'], 1, 0)
print(df_res)
accuracy = df_res['match'].sum() / df_res.shape[0]
accuracy

     predicted true  predicted_label  match
0     0.620841    0                1      0
1     0.776323    0                1      0
2     0.785008    0                1      0
3     0.716067    0                1      0
4     0.701616    0                1      0
...        ...  ...              ...    ...
1260  0.780409    0                1      0
1261  0.768643    1                1      1
1262  0.805217    1                1      1
1263  0.604548    0                1      0
1264   0.73299    0                1      0

[1265 rows x 4 columns]


0.5399209486166008

In [41]:
import sklearn as skl

In [42]:
df_res["true"]

0       0
1       0
2       0
3       0
4       0
       ..
1260    0
1261    1
1262    1
1263    0
1264    0
Name: true, Length: 1265, dtype: object

In [43]:
skl.metrics.f1_score(np.asarray(df_res["true"]).astype(float), np.asarray(df_res["predicted_label"]).astype(float))

0.5174129353233831

In [44]:
df_res

Unnamed: 0,predicted,true,predicted_label,match
0,0.620841,0,1,0
1,0.776323,0,1,0
2,0.785008,0,1,0
3,0.716067,0,1,0
4,0.701616,0,1,0
...,...,...,...,...
1260,0.780409,0,1,0
1261,0.768643,1,1,1
1262,0.805217,1,1,1
1263,0.604548,0,1,0


In [45]:
df_res.describe()


Unnamed: 0,predicted_label,match
count,1265.0,1265.0
mean,0.457708,0.539921
std,0.498405,0.498601
min,0.0,0.0
25%,0.0,0.0
50%,0.0,1.0
75%,1.0,1.0
max,1.0,1.0


In [46]:
df_one = df_res[df_res["true"] == 1]
df_one.describe()

Unnamed: 0,predicted_label,match
count,627.0,627.0
mean,0.497608,0.497608
std,0.500393,0.500393
min,0.0,0.0
25%,0.0,0.0
50%,0.0,0.0
75%,1.0,1.0
max,1.0,1.0


In [47]:
df_zeros = df_res[df_res["true"] == 0]
df_zeros.describe()

Unnamed: 0,predicted_label,match
count,638.0,638.0
mean,0.418495,0.581505
std,0.493699,0.493699
min,0.0,0.0
25%,0.0,0.0
50%,0.0,1.0
75%,1.0,1.0
max,1.0,1.0
