In [4]:
import pandas as pd
import numpy as np

import tensorflow as tf

from tensorflow import feature_column
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

In [255]:
data = pd.read_csv('stars.csv')

In [256]:
data.head()

Unnamed: 0,Temperature (K),Luminosity(L/Lo),Radius(R/Ro),Absolute magnitude(Mv),Star type,Star color,Spectral Class
0,3068,0.0024,0.17,16.12,0,Red,M
1,3042,0.0005,0.1542,16.6,0,Red,M
2,2600,0.0003,0.102,18.7,0,Red,M
3,2800,0.0002,0.16,16.65,0,Red,M
4,1939,0.000138,0.103,20.06,0,Red,M


In [253]:
data.columns = data.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '').str.replace('/', '-')
# data.pop('hotel_name')
# data['score'] = data['score'] - 1
# data['user_country'][103] = 'India'
# data['member_years'][75] = 0

In [202]:
user_country = data['user_country'].unique()
period_of_stay = data['period_of_stay'].unique()
pool = data['pool'].unique()
traveler_type = data['traveler_type'].unique()
gym = data['gym'].unique()
tennis_court = data['tennis_court'].unique()
spa = data['spa'].unique()
casino = data['casino'].unique()
free_internet = data['free_internet'].unique()
hotel_stars = data['hotel_stars'].unique()
user_continent = data['user_continent'].unique()
review_month = data['review_month'].unique()
review_weekday = data['review_weekday'].unique()

In [203]:
train, test = train_test_split(data, test_size=0.2)
train, val = train_test_split(train, test_size=0.2)
print(len(train), 'train examples')
print(len(val), 'validation examples')
print(len(test), 'test examples')

322 train examples
81 validation examples
101 test examples


In [204]:
def df_to_dataset(df, batch_size=32):
    
    df = df.copy()
    labels = df.pop('score')
    ds = tf.data.Dataset.from_tensor_slices((dict(df), labels))    
    ds = ds.batch(batch_size)
    return ds

In [205]:
train_ds = df_to_dataset(train)
val_ds = df_to_dataset(val)
test_ds = df_to_dataset(test)

In [207]:
for feature_batch, label_batch in train_ds.take(1):
    print('Every feature:', list(feature_batch.keys()))
    print('A batch of Spa:', feature_batch['spa'])
    print('A batch of Scores:', label_batch )

Every feature: ['user_country', 'nr_reviews', 'nr_hotel_reviews', 'helpful_votes', 'period_of_stay', 'traveler_type', 'pool', 'gym', 'tennis_court', 'spa', 'casino', 'free_internet', 'hotel_stars', 'nr_rooms', 'user_continent', 'member_years', 'review_month', 'review_weekday']
A batch of Spa: tf.Tensor(
[b'YES' b'YES' b'YES' b'YES' b'YES' b'YES' b'YES' b'NO' b'YES' b'YES'
 b'YES' b'YES' b'YES' b'YES' b'YES' b'YES' b'NO' b'NO' b'YES' b'NO' b'YES'
 b'YES' b'YES' b'YES' b'YES' b'NO' b'YES' b'NO' b'YES' b'YES' b'YES' b'NO'], shape=(32,), dtype=string)
A batch of Scores: tf.Tensor([4 0 3 3 4 2 3 1 3 3 3 4 4 4 4 3 4 4 4 4 4 2 1 2 4 3 1 4 4 3 4 4], shape=(32,), dtype=int64)


In [208]:
example = next(iter(train_ds))[0]
def demo(feature_column):
    feature_layer = layers.DenseFeatures(feature_column)
    print(feature_layer(example).numpy())

In [209]:
train_ds.element_spec

({'user_country': TensorSpec(shape=(None,), dtype=tf.string, name=None),
  'nr_reviews': TensorSpec(shape=(None,), dtype=tf.int64, name=None),
  'nr_hotel_reviews': TensorSpec(shape=(None,), dtype=tf.int64, name=None),
  'helpful_votes': TensorSpec(shape=(None,), dtype=tf.int64, name=None),
  'period_of_stay': TensorSpec(shape=(None,), dtype=tf.string, name=None),
  'traveler_type': TensorSpec(shape=(None,), dtype=tf.string, name=None),
  'pool': TensorSpec(shape=(None,), dtype=tf.string, name=None),
  'gym': TensorSpec(shape=(None,), dtype=tf.string, name=None),
  'tennis_court': TensorSpec(shape=(None,), dtype=tf.string, name=None),
  'spa': TensorSpec(shape=(None,), dtype=tf.string, name=None),
  'casino': TensorSpec(shape=(None,), dtype=tf.string, name=None),
  'free_internet': TensorSpec(shape=(None,), dtype=tf.string, name=None),
  'hotel_stars': TensorSpec(shape=(None,), dtype=tf.int64, name=None),
  'nr_rooms': TensorSpec(shape=(None,), dtype=tf.int64, name=None),
  'user_conti

In [210]:
col_names = ['user_country', 'period_of_stay', 'pool', 'traveler_type', 'gym',
              'tennis_court', 'spa', 'casino', 'free_internet', 'hotel_stars', 
              'user_continent', 'review_month', 'review_weekday']

vocab_list = [user_country, period_of_stay, pool, traveler_type, gym,
              tennis_court, spa, casino, free_internet, hotel_stars, 
              user_continent, review_month, review_weekday]

feature_columns = []

for i in range(len(col_names)):
    col = feature_column.categorical_column_with_vocabulary_list(
        col_names[i], vocab_list[i]
    )
    one_hot_col = feature_column.indicator_column(col)
    print(one_hot_col)
    feature_columns.append(one_hot_col)
    
for col in ['nr_reviews', 'nr_hotel_reviews', 'helpful_votes', 'nr_rooms', 'member_years']:
    feature_columns.append(feature_column.numeric_column(col))

IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='user_country', vocabulary_list=('USA', 'UK', 'Canada', 'India', 'Australia', 'New Zeland', 'Ireland', 'Egypt', 'Finland', 'Kenya', 'Jordan', 'Netherlands', 'Syria', 'Scotland', 'South Africa', 'Swiss', 'United Arab Emirates', 'Hungary', 'China', 'Greece', 'Mexico', 'Croatia', 'Germany', 'Malaysia', 'Thailand', 'Phillippines', 'Israel', 'Belgium', 'Puerto Rico', 'Switzerland', 'Norway', 'France', 'Spain', 'Singapore', 'Brazil', 'Costa Rica', 'Iran', 'Saudi Arabia', 'Honduras', 'Denmark', 'Taiwan', 'Hawaii', 'Kuwait', 'Czech Republic', 'Japan', 'Korea', 'Italy'), dtype=tf.string, default_value=-1, num_oov_buckets=0))
IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='period_of_stay', vocabulary_list=('Dec-Feb', 'Mar-May', 'Jun-Aug', 'Sep-Nov'), dtype=tf.string, default_value=-1, num_oov_buckets=0))
IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='pool', vocabulary_list=('NO

In [211]:
feature_layer = tf.keras.layers.DenseFeatures(feature_columns)

In [244]:
model.reset_states()
model = tf.keras.Sequential([
    feature_layer,
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(16, activation='relu'),
#     tf.keras.layers.Dense(16, activation='relu'),
#     tf.keras.layers.Dense(16, activation='relu'),
#     tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(5)
])

In [245]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.sparse_categorical_crossentropy,
              metrics=['accuracy'])

In [246]:
model.fit(train_ds, validation_data=test_ds, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fbef497c390>