In [2]:
import os
import pandas as pd
import requests as req
import json
import numpy as np
import re
from unidecode import unidecode

MAX_NAME_SIZE = 50

In [3]:
DATA_URL = 'https://api.got.show/api/characters/'

data = req.get(DATA_URL).content

In [4]:
df = pd.read_json(data, orient='records')
len(df)

2028

In [5]:
df = df[['name', 'dateOfBirth', 'dateOfDeath']]

In [6]:
df_only_born = df.dropna(subset=['dateOfBirth'])
len(df_only_born)

478

In [7]:
def build_dict(names):
    out_dict = set()
    
    for name in names:
        for letter in name:
            out_dict.add(letter)
    
    return sorted(list(out_dict))

def preprocess_name(name):
    out_name = unidecode(name.lower())
    out_name = re.sub(r'\W', '', out_name)
    
    if len(out_name) > MAX_NAME_SIZE:
        out_name = out_name[:MAX_NAME_SIZE]
    
    return out_name

def preprocess_age(birth, death):
    if np.isnan(death):
        return -1
    else:
        return int(death - birth)

def names_to_letters_list(names):
    letters_list = []
    
    for name in names:
        letters_list.append(list(name))
    
    return letters_list

In [8]:
df_names = df_only_born['name'].apply(preprocess_name)

In [9]:
LETTERS_DICT = build_dict(list(df_names))

In [10]:
list_ages = []

for birth, death in zip(list(df_only_born['dateOfBirth']), list(df_only_born['dateOfDeath'])):
    list_ages.append(preprocess_age(birth, death))

In [42]:
def onehot_encoder(name):
    name_encoded = np.zeros((MAX_NAME_SIZE, len(LETTERS_DICT)), dtype='int')
    for index, character in enumerate(list(name)):
        char_index = LETTERS_DICT.index(character)
        name_encoded[index, char_index] = 1
    
    return name_encoded

In [44]:
onehot_names = []

for index, name in enumerate(list(df_names)):
    onehot_names.append(onehot_encoder(name))

onehot_names = np.array(onehot_names)
onehot_names.shape

(478, 50, 26)

In [71]:
from sklearn.preprocessing import MinMaxScaler
mm_scaler = MinMaxScaler()
scaler = mm_scaler.fit([[age] for age in list_ages])

In [73]:
ages_scaled = scaler.transform([[age] for age in list_ages])
ages_scaled = np.array(ages_scaled)
ages_scaled.shape

(478, 1)

# Machine Learning

In [75]:
from keras.models import Sequential
from keras.layers import Dense, Dropout

model = Sequential()
model.add(Dense(units=10, activation='relu', input_shape=(MAX_NAME_SIZE, len(LETTERS_DICT))))
model.add(Dense(units=10, activation='tanh'))
model.add(Dense(units=1, activation='softmax'))

model.summary()

ModuleNotFoundError: No module named 'keras'