In [None]:
import os
import random as rn
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense

np.set_printoptions(linewidth=200)

%matplotlib inline

In [None]:
def set_random_seeds():
    RANDOM_SEED = 1234
    os.environ['PYTHONHASHSEED'] = str(RANDOM_SEED)
    rn.seed(RANDOM_SEED)
    np.random.seed(RANDOM_SEED)
    tf.random.set_seed(RANDOM_SEED)

In [None]:
df = pd.read_csv("../data/naked_and_afraid.csv", decimal=",")
df.head()

In [None]:
shared_cols = [
    "Scheduled Duration",
    "Location",
    "Continent",
]

female_cols = [
    "Female Age",
    "Start FPSR",
    "End FPSR",
    "F. Duration",
]

male_cols = [
    "Male Age",
    "Start MPSR",
    "End MPSR",
    "M. Duration",
]

final_cols = shared_cols + ["Age", "Start PSR", "End PSR", "Duration"]

In [None]:
df_female = df[shared_cols + female_cols]
df_female.columns = final_cols

print(df_female.shape)
df_female.head()

In [None]:
df_male = df[shared_cols + male_cols]
df_male.columns = final_cols

print(df_male.shape)
df_male.head()

In [None]:
df_final = pd.concat((df_female, df_male), ignore_index=True)

df_final["sex"] = [1] * df_female.shape[0] + [0] * df_male.shape[0]
df_final["has_finished"] = df_final["Scheduled Duration"] == df_final.Duration
df_final.dropna(how="any", subset=final_cols, inplace=True)

print(df_final.shape)
df_final.sample(n=10, random_state=1234).head(10)

In [None]:
loc_enc = LabelEncoder()
loc_enc.fit(df_final.Location.values)

cont_enc = LabelEncoder()
cont_enc.fit(df_final.Continent.values)

df_samples = df_final.copy(deep=True)
df_samples.Location = loc_enc.transform(df_samples.Location)
df_samples.Continent = cont_enc.transform(df_samples.Continent)

df_samples.sample(n=10, random_state=1234).head(10)

In [None]:
df_samples.sex.value_counts().plot(kind="bar")

In [None]:
df_samples.has_finished.value_counts().plot(kind="bar")

# Treinando o Modelo

In [None]:
y_sex = df_samples.sex.values
y_has_finished = df_samples.has_finished.values

x = df_samples[final_cols + ["sex"]].values

std = StandardScaler()
std.fit(x)
x = std.transform(x)

print(x.shape, x.dtype)

In [None]:
set_random_seeds()

model = Sequential([
    Dense(4, activation="relu", input_dim=8, name="enc_1"),
    Dense(2, activation="relu", name="embeddings"),
    Dense(4, activation="relu", name="dec_1"),
    Dense(8, activation="linear", name="decoded")
])
model.summary()

model.compile(optimizer="adam", loss="mean_squared_error")

model.fit(x, x, batch_size=16, epochs=500, verbose=1)

In [None]:
model_enc = Model(inputs=model.inputs, outputs=model.get_layer("embeddings").output)
model_enc.summary()

y_embed = model_enc.predict(x)
y_embed.shape

In [None]:
mask_women_has_finished = np.logical_and(y_sex == 1, y_has_finished)
mask_women_not_finished = np.logical_and(y_sex == 1, y_has_finished == False)
mask_men_has_finished = np.logical_and(y_sex == 0, y_has_finished)
mask_men_not_finished = np.logical_and(y_sex == 0, y_has_finished == False)

emb_whf = y_embed[mask_women_has_finished]
emb_wnf = y_embed[mask_women_not_finished]
emb_mhf = y_embed[mask_men_has_finished]
emb_mnf = y_embed[mask_men_not_finished]

plt.figure(figsize=(10, 8))
plt.scatter(emb_whf[:, 0], emb_whf[:, 1], marker="o", c="#54b3f6", s=100, label="women - has finished")
plt.scatter(emb_wnf[:, 0], emb_wnf[:, 1], marker="o", c="#8563eb", s=100, label="women - not finished")
plt.scatter(emb_mhf[:, 0], emb_mhf[:, 1], marker="^", c="#54b3f6", s=100, label="men - has finished")
plt.scatter(emb_mnf[:, 0], emb_mnf[:, 1], marker="^", c="#8563eb", s=100, label="men - not finished")

# plt.legend()
plt.tight_layout()

In [None]:
df_final.head()

In [None]:
df_me = pd.DataFrame([[21, "Brazil", "South America", 29, 1.0, 1.0, 1, 0]], columns=df_final.columns[:-1])
df_me

In [None]:
df_sample_me = df_me.copy(deep=True)
df_sample_me.Location = loc_enc.transform(df_sample_me.Location)
df_sample_me.Continent = cont_enc.transform(df_sample_me.Continent)

df_sample_me

In [None]:
x_me = std.transform(df_sample_me.values)

emb_me = model_enc.predict(x_me)
emb_me.shape

In [None]:
mask_women_has_finished = np.logical_and(y_sex == 1, y_has_finished)
mask_women_not_finished = np.logical_and(y_sex == 1, y_has_finished == False)
mask_men_has_finished = np.logical_and(y_sex == 0, y_has_finished)
mask_men_not_finished = np.logical_and(y_sex == 0, y_has_finished == False)

emb_whf = y_embed[mask_women_has_finished]
emb_wnf = y_embed[mask_women_not_finished]
emb_mhf = y_embed[mask_men_has_finished]
emb_mnf = y_embed[mask_men_not_finished]

plt.figure(figsize=(10, 8))
plt.scatter(emb_whf[:, 0], emb_whf[:, 1], marker="o", c="#54b3f6", s=100, label="women - has finished")
plt.scatter(emb_wnf[:, 0], emb_wnf[:, 1], marker="o", c="#8563eb", s=100, label="women - not finished")
plt.scatter(emb_mhf[:, 0], emb_mhf[:, 1], marker="^", c="#54b3f6", s=100, label="men - has finished")
plt.scatter(emb_mnf[:, 0], emb_mnf[:, 1], marker="^", c="#8563eb", s=100, label="men - not finished")
plt.scatter(emb_me[:, 0], emb_me[:, 1], marker="X", c="red", s=100, label="me")

# plt.legend()
plt.tight_layout()
plt.savefig("../plots/me.png", dpi=300)