In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [15]:
data = pd.read_csv("../input/dl-course-data/spotify.csv")
data.dropna(axis=0, subset=["track_popularity"], inplace=True)


features_num = ['danceability', 'energy', 'key', 'loudness', 'mode',
                'speechiness', 'acousticness', 'instrumentalness',
                'liveness', 'valence', 'tempo', 'duration_ms']
features_cat = ['playlist_genre']

features = features_num + features_cat

df_train = data.sample(frac=0.8, random_state=0)
df_valid = data.drop(df_train.index, axis=0)

x_train_org = df_train.drop(["track_popularity"], axis=1)[features]
x_valid_org = df_valid.drop(["track_popularity"], axis=1)[features]
y_train_org = df_train["track_popularity"]
y_valid_org = df_valid["track_popularity"]

In [17]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer

numerical_transformer = Pipeline(steps=[
    ("impute", SimpleImputer(strategy="median"))
])

categorical_transformer = Pipeline(steps=[
    ("impute", SimpleImputer(strategy="most_frequent")),
    ("oh", OneHotEncoder(handle_unknown="ignore"))
])

preprocessor = ColumnTransformer(transformers=[
    ("num", numerical_transformer, features_num),
    ("cat", categorical_transformer, features_cat)
])

In [16]:
x_train = preprocessor.fit_transform(x_train_org)
x_valid = preprocessor.transform(x_valid_org)
y_train = y_train_org / 100
y_valid = y_valid_org / 100

In [22]:
from tensorflow import keras
from tensorflow.keras import layers, callbacks

early_stopping_callback = callbacks.EarlyStopping(
    min_delta=0.01,
    patience=20,
    restore_best_weights=True)

model = keras.Sequential([
    layers.InputLayer(input_shape=[x_train.shape[1]]),
    layers.BatchNormalization(),
    layers.Dense(units=1024, activation="relu"),
    layers.BatchNormalization(),
    layers.Dropout(0.3), # in front of the layer we want to apply it to
    layers.Dense(units=1024, activation="relu"),
    layers.BatchNormalization(),
    layers.Dropout(0.3),
    layers.Dense(units=1024, activation="relu"),
    layers.Dense(units=1, activation="linear"),
])

model.compile(
    loss="mae",
    optimizer="adam"
)

history = model.fit(
    x_train, y_train,
    validation_data=(x_valid, y_valid),
    epochs=500,
    batch_size=150,
    callbacks=[early_stopping_callback]
)

In [25]:
history_df = pd.DataFrame(history.history)
history_df.loc[:, ["loss", "val_loss"]].plot()
print(history_df["val_loss"].min())