<a href="https://colab.research.google.com/github/aladinor/notebooks/blob/origin%2Fmaster/gamma_params_retrieval.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%load_ext tensorboard
# Use some functions from tensorflow_docs
!pip install -q git+https://github.com/tensorflow/docs

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for tensorflow-docs (setup.py) ... [?25l[?25hdone


In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import pandas as pd
from imblearn.over_sampling import RandomOverSampler
from collections import Counter
import tensorflow_docs as tfdocs
import tensorflow_docs.plots
import tensorflow_docs.modeling
from tensorflow.keras import regularizers
import pandas as pd
import numpy as np
from google.colab import drive
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  # raise SystemError('GPU device not found')
  print('Found GPU at: {}'.format(device_name))
else:
  print("GPU not found")

GPU not found


In [4]:
df = pd.read_parquet('gdrive/My Drive/Colab Notebooks/all_data.parquet')
df['dfr'] = df['dbz_t_ku'] - df['dbz_t_ka']

In [5]:
df['dm_class'] = (df.dm > 1.0).astype(int)
df.shape

(8027, 45)

In [6]:
X, Y = df[['dbz_t_ku', 'dfr']], df[['log10_nw', 'dm', 'mu']]

In [7]:
scaler_x = StandardScaler()
scaler_y = StandardScaler()
X_scaled = scaler_x.fit_transform(X)
Y_scaled = scaler_y.fit_transform(Y)

In [8]:
Y_scaled = np.append(Y_scaled, df.dm_class.values[:, np.newaxis], axis=1)

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, Y_scaled, test_size=0.33, random_state=42)

In [10]:
def build_model():
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Dense(8, activation="relu",
                                    input_shape=[X_train.shape[1]]))
    model.add(tf.keras.layers.Dense(8, activation="relu"))
    model.add(tf.keras.layers.Dense(8, activation="relu"))
    model.add(tf.keras.layers.Dense(8, activation="relu"))
    model.add(tf.keras.layers.Dense(8, activation="relu"))
    model.add(tf.keras.layers.Dense(8, activation="relu"))
    model.add(tf.keras.layers.Dense(8, activation="relu"))
    model.add(tf.keras.layers.Dense(3))
    # optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
    optimizer = tf.keras.optimizers.Adamax(learning_rate=0.001)
    # optimizer = tf.keras.optimizers.RMSprop(0.001)
    # optimizer = tf.keras.optimizers.Adadelta(0.1)
    # optimizer = tf.keras.optimizers.AdamW(0.001)
    # optimizer = tf.keras.optimizers.SGD(0.001)
    model.compile(loss="mse",
              optimizer=optimizer,
              metrics=['mae', 'mse'])
    return model

In [11]:
model = build_model()

In [None]:
class PrintDot(keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs):
    if epoch % 100 == 0: print('')
    print('.', end='')

EPOCHS = 1000

# The patience parameter is the amount of epochs to check for improvement
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)


history = model.fit(X_train, y_train[:, :-1],
                    validation_data=(X_test, y_test[:,:-1]),
                    epochs=EPOCHS,
                    batch_size=32, verbose=0,
                    callbacks=[PrintDot(), early_stop])


....................................................................................................
....................................................................................................
......................................................................................

In [None]:
hist = pd.DataFrame(history.history)

In [None]:
def plot_history(history):
  fig, (ax, ax1) = plt.subplots(1, 2, figsize=(12, 5))
  hist['epoch'] = history.epoch


  ax.set_xlabel('Epoch')
  ax.set_ylabel('Mean Abs Error')
  ax.plot(hist['epoch'], hist['mae'],
           label='Train Error')
  try:
    ax.plot(hist['epoch'], hist['val_mae'],
             label = 'Val Error')
  except KeyError:
    pass
  # plt.ylim([0,5])
  ax.legend()

  ax1.set_xlabel('Epoch')
  ax1.set_ylabel('Mean Square Error')
  ax1.plot(hist['epoch'], hist['mse'],
           label='Train Error')
  try:
    ax1.plot(hist['epoch'], hist['val_mse'],
              label = 'Val Error')
  except KeyError:
    pass
  # plt.ylim([0,3])
  ax1.legend()
  plt.show()


plot_history(history)

In [None]:
yhat = model.predict(X_test[:, :2])
yhat = scaler_y.inverse_transform(yhat)
y_test_unscaled = scaler_y.inverse_transform(y_test[:, :-1])

In [None]:
def metrics_pd(x, xpred):
    rmse = np.sqrt(np.sum((xpred - x) ** 2 / len(x)))
    mae = np.sum(np.abs(xpred - x)) / len(x)
    corr = np.corrcoef(x, xpred)**2.
    return rmse, mae, corr[0, 1]

In [None]:
# 'log10_nw', 'dm', 'mu'
fig, (ax, ax1, ax2) = plt.subplots(1, 3, figsize=(13, 4))
ax.scatter(y_test_unscaled[:, 0], yhat[:, 0], s=1)
ax.set_xlabel(r"$Log_{10}(Nw) \ - \ True $")
ax.set_ylabel(r"$Log_{10}(Nw) \ - \ Est. $")
x = np.linspace(*ax.get_xlim())
ax.plot(x, x, c='k', ls='--', lw=0.8)
rmse, mae, corr1 = metrics_pd(y_test[:, 0], yhat[:, 0])
ax.text(3, 10, r"$r^2$" + f"={corr1:.2f}")
ax.text(3, 9.5, r"$RMSE$" + f"={rmse:.2f}")
ax.text(3, 9, r"$MAE$" + f"={mae:.2f}")

ax1.scatter(y_test_unscaled[:, 1], yhat[:, 1], s=1)
ax1.set_xlabel(r"$Dm \ - \ True $")
ax1.set_ylabel(r"$Dm \ - \ Est. $")
x = np.linspace(*ax1.get_xlim())
ax1.plot(x, x, c='k', ls='--', lw=0.8)
rmse, mae, corr1 = metrics_pd(y_test[:, 1], yhat[:, 1])
ax1.text(0.3, 2.4, r"$r^2$" + f"={corr1:.2f}")
ax1.text(0.3, 2.2, r"$RMSE$" + f"={rmse:.2f}")
ax1.text(0.3, 2.0, r"$MAE$" + f"={mae:.2f}")

ax2.scatter(y_test_unscaled[:, 2], yhat[:, 2], s=1)
ax2.set_xlabel(r"$\mu  \ - \ True $")
ax2.set_ylabel(r"$\mu \ - \ Est. $")
rmse, mae, corr1 = metrics_pd(y_test[:, 2], yhat[:, 2])
ax2.text(0, 120, r"$r^2$" + f"={corr1:.2f}")
ax2.text(0, 110, r"$RMSE$" + f"={rmse:.2f}")
ax2.text(0, 100, r"$MAE$" + f"={mae:.2f}")
x = np.linspace(*ax2.get_xlim())
ax2.plot(x, x, c='k', ls='--', lw=0.8)

plt.tight_layout()

## Trying to balance the data

In [None]:
lower, greater = np.bincount(df['dm_class'])
total = greater + lower
print('Examples:\n    Total: {}\n    greater: {} ({:.2f}% of total)\n'.format(
    total, greater, 100 * greater / total))

In [None]:
X, Y = df[['dbz_t_ku', 'dfr', 'log10_nw', 'dm', 'mu']], df['dm_class']

In [None]:
scaler_x = StandardScaler()
X_scaled = scaler_x.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, Y, test_size=0.33, random_state=42)

In [None]:
over_sampler = RandomOverSampler(random_state=42)
X_res, y_res = over_sampler.fit_resample(X_train, y_train)
print(f"Training target statistics: {Counter(y_res)}")
print(f"Testing target statistics: {Counter(y_test)}")

In [None]:
def build_model():
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Dense(8, activation="relu",
                                    input_shape=[X_train.shape[1]-3]))
    model.add(tf.keras.layers.Dense(8, activation="relu"))
    model.add(tf.keras.layers.Dense(8, activation="relu"))
    model.add(tf.keras.layers.Dense(8, activation="relu"))
    model.add(tf.keras.layers.Dense(8, activation="relu"))
    model.add(tf.keras.layers.Dense(8, activation="relu"))
    model.add(tf.keras.layers.Dense(8, activation="relu"))
    model.add(tf.keras.layers.Dense(3))
    # optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
    optimizer = tf.keras.optimizers.Adamax(learning_rate=0.001)
    # optimizer = tf.keras.optimizers.RMSprop(0.001)
    # optimizer = tf.keras.optimizers.Adadelta(0.1)
    # optimizer = tf.keras.optimizers.AdamW(0.001)
    # optimizer = tf.keras.optimizers.SGD(0.001)
    model.compile(loss="mse",
              optimizer=optimizer,
              metrics=['mae', 'mse'])
    return model

In [None]:
model = build_model()

In [None]:
EPOCHS = 500
history = model.fit(X_res[:, :2], X_res[:, 2:],
                    validation_data=(X_res[:, :2], X_res[:, 2:]),
                    epochs=EPOCHS,
                    batch_size=32, verbose=0,
                    callbacks=[PrintDot(), early_stop])

In [None]:
hist = pd.DataFrame(history.history)
plot_history(history)

In [None]:
yhat = model.predict(X_test[:, :2])
yhat = scaler_y.inverse_transform(yhat)
y_test_unscaled = scaler_y.inverse_transform(X_test[:, 2:])

In [None]:
# 'log10_nw', 'dm', 'mu'
fig, (ax, ax1, ax2) = plt.subplots(1, 3, figsize=(13, 4))
ax.scatter(y_test_unscaled[:, 0], yhat[:, 0], s=1)
ax.set_xlabel(r"$Log_{10}(Nw) \ - \ True $")
ax.set_ylabel(r"$Log_{10}(Nw) \ - \ Est. $")
x = np.linspace(*ax.get_xlim())
ax.plot(x, x, c='k', ls='--', lw=0.8)
rmse, mae, corr1 = metrics_pd(y_test_unscaled[:, 0], yhat[:, 0])
ax.text(3, 10, r"$r^2$" + f"={corr1:.2f}")
ax.text(3, 9.5, r"$RMSE$" + f"={rmse:.2f}")
ax.text(3, 9, r"$MAE$" + f"={mae:.2f}")

ax1.scatter(y_test_unscaled[:, 1], yhat[:, 1], s=1)
ax1.set_xlabel(r"$Dm \ - \ True $")
ax1.set_ylabel(r"$Dm \ - \ Est. $")
x = np.linspace(*ax1.get_xlim())
ax1.plot(x, x, c='k', ls='--', lw=0.8)
rmse, mae, corr1 = metrics_pd(y_test_unscaled[:, 1], yhat[:, 1])
ax1.text(0.3, 2.4, r"$r^2$" + f"={corr1:.2f}")
ax1.text(0.3, 2.2, r"$RMSE$" + f"={rmse:.2f}")
ax1.text(0.3, 2.0, r"$MAE$" + f"={mae:.2f}")

ax2.scatter(y_test_unscaled[:, 2], yhat[:, 2], s=1)
ax2.set_xlabel(r"$\mu  \ - \ True $")
ax2.set_ylabel(r"$\mu \ - \ Est. $")
rmse, mae, corr1 = metrics_pd(y_test_unscaled[:, 2], yhat[:, 2])
ax2.text(0, 120, r"$r^2$" + f"={corr1:.2f}")
ax2.text(0, 110, r"$RMSE$" + f"={rmse:.2f}")
ax2.text(0, 100, r"$MAE$" + f"={mae:.2f}")
x = np.linspace(*ax2.get_xlim())
ax2.plot(x, x, c='k', ls='--', lw=0.8)

plt.tight_layout()

In [None]:
bar_colors = ['tab:red', 'tab:blue']
fig, ax = plt.subplots()
ax.bar(['Lower', "Greater"], np.bincount(y_train), color=bar_colors)
ax.set_ylabel("Counts")

In [None]:
bar_colors = ['tab:red', 'tab:blue']
fig, ax = plt.subplots()
ax.bar(['Lower', "Greater"], np.bincount(y_res), color=bar_colors)
ax.set_ylabel("Counts")

## Multi-class classification

Lets see if instead of predicting the parameters of the NG psd, we can predict which class it belong to considering our previous kmeans classifier.

In [None]:
X, Y = df[['dbz_t_ku', 'dfr']], df['kmeans_6']
scaler_x = StandardScaler()
X_scaled = scaler_x.fit_transform(X)

In [None]:
X_train, X_test, train_labels, test_labels = train_test_split(X_scaled, Y, test_size=0.33, random_state=42)

In [None]:
over_sampler = RandomOverSampler(random_state=42)
X_train_res, train_lab_res = over_sampler.fit_resample(X_train, train_labels)
print(f"Training target statistics: {Counter(train_lab_res)}")
print(f"Testing target statistics: {Counter(test_labels)}")

In [None]:
def build_model2():
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Dense(2, activation="relu",
                                    input_shape=[X_train.shape[1]]))
    model.add(tf.keras.layers.Dense(8, activation="relu"))
    # model.add(tf.keras.layers.Dense(8, activation="relu"))
    # model.add(tf.keras.layers.Dense(8, activation="relu"))
    model.add(tf.keras.layers.Dense(8, activation="relu"))
    model.add(tf.keras.layers.Dense(8, activation="relu"))
    model.add(tf.keras.layers.Dense(8, activation="relu"))
    model.add(tf.keras.layers.Dense(6,  activation="softmax"))
    # optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
    # optimizer = tf.keras.optimizers.Adamax(learning_rate=0.001)
    # optimizer = tf.keras.optimizers.RMSprop(0.001)
    # optimizer = tf.keras.optimizers.Adadelta(0.1)
    # optimizer = tf.keras.optimizers.AdamW(0.001)
    optimizer = tf.keras.optimizers.SGD(0.001)
    model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  optimizer=optimizer,
                  metrics=['accuracy'])
    return model

In [None]:
model = build_model2()

In [None]:
EPOCHS = 500
history = model.fit(X_train_res, train_lab_res,
                    validation_data=(X_test, test_labels),
                    epochs=EPOCHS,
                    batch_size=32, verbose=0,
                    callbacks=[PrintDot(), early_stop])

In [None]:
hist = pd.DataFrame(history.history)
hist

In [None]:
hist.plot()

In [None]:
lrs = 1e-3 * (10**(np.arange(101)/20))
plt.semilogx(lrs, hist["loss"])
plt.semilogx(lrs, hist["val_loss"]) # want the x-axis to be log-scale
plt.xlabel("Learning rate")
plt.ylabel("Loss")
plt.title("Finding the ideal learning rate");