In [None]:
#1. Open a Jupyter notebook to complete the activity.
#2. Import the TensorFlow and pandas libraries.
#3. Load in the superconductivity.csv dataset.
#4. Drop any rows that have null values.
#5. Set the target values to true when values of the critical_temp column are above 77.36 and false when below. The feature dataset is the remaining columns in the dataset.
#6. Rescale the feature dataset using a standard scaler.
#7. Initialize a model of the Keras Sequential class.
#8. Add an input layer, three hidden layers of sizes 32, 16, and 8, and an output layer with a sigmoid activation function of size 1 to the model.
#9. Compile the model with an RMSprop optimizer with a learning rate equal to 0.0001 and binary cross-entropy for the loss and compute the accuracy metric.
#10. Add a callback to write logs to TensorBoard. (optional)
#11. Fit the model to the training data for 50 epochs and a validation split equal to 0%.
#12. Evaluate the model on the training data.

In [2]:
import tensorflow as tf
import pandas as pd

In [3]:
# Load data
df = pd.read_csv("https://raw.githubusercontent.com/fenago/datasets/main/superconductivity.csv")

In [4]:
# View a sample
df.sample(5)

Unnamed: 0,number_of_elements,mean_atomic_mass,wtd_mean_atomic_mass,gmean_atomic_mass,wtd_gmean_atomic_mass,entropy_atomic_mass,wtd_entropy_atomic_mass,range_atomic_mass,wtd_range_atomic_mass,std_atomic_mass,...,wtd_mean_Valence,gmean_Valence,wtd_gmean_Valence,entropy_Valence,wtd_entropy_Valence,range_Valence,wtd_range_Valence,std_Valence,wtd_std_Valence,critical_temp
6352,5,101.62801,55.981263,78.379583,36.45523,1.438484,1.430809,134.3606,19.032623,52.867028,...,2.092664,2.491462,2.0728,1.564957,1.277334,2,1.011583,0.8,0.339066,41.0
14818,5,81.789084,52.617798,45.923551,30.44545,1.211225,1.183367,154.11932,22.737765,69.089853,...,2.833333,2.930156,2.749459,1.586785,1.411949,2,0.95,0.632456,0.687184,8.2
13024,3,130.298627,116.146323,123.093539,110.063924,1.039711,0.758662,102.17762,67.107147,45.991695,...,5.25,5.646216,5.233176,1.095078,0.69115,1,3.6,0.471405,0.433013,7.9
9073,5,88.679574,57.453544,70.154267,35.982178,1.445332,1.00661,122.90607,36.455794,46.485352,...,2.267857,2.168944,2.229448,1.594167,1.050239,1,1.125714,0.4,0.442843,10.4
5707,4,51.81085,42.722456,43.468565,33.871021,1.241927,1.315143,71.6206,8.83856,26.646429,...,2.0,2.0,2.0,1.386294,1.290165,0,0.56,0.0,0.0,109.0


In [5]:
# Clear all empty values
df.dropna(inplace=True)

In [6]:
# Create target (y) and features (x)
target = df['critical_temp'].apply(lambda x: 1 if x>77.36 else 0)
features = df.drop('critical_temp', axis=1)
print(f'dimensions (x, y): {features.shape, target.shape}')

dimensions (x, y): ((21263, 81), (21263,))


In [7]:
# Rescale input data
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
feature_array = scaler.fit_transform(features)
features = pd.DataFrame(feature_array, columns=features.columns)

In [8]:
# Create NN object
model = tf.keras.Sequential()

# Add input layer: size coresponds to the number of x components
model.add(tf.keras.layers.InputLayer(input_shape=(features.shape[1],), name='Input_layer'))

# Add hidden layers
model.add(tf.keras.layers.Dense(32, name='Dense_layer_1'))
model.add(tf.keras.layers.Dense(16, name='Dense_layer_2'))
model.add(tf.keras.layers.Dense(8, name='Dense_layer_3'))

# Add output layer: size corresponds to the number of y components
model.add(tf.keras.layers.Dense(1, name='Output_layer', activation='sigmoid'))

In [9]:
# Set back propagation learner/optimizer 
model.compile(tf.optimizers.RMSprop(0.0001), loss='binary_crossentropy', metrics=['accuracy'])

In [10]:
# Actual training
model.fit(x=features.to_numpy(), y=target.to_numpy(), epochs=50, validation_split=0.5)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x7ff390576040>

In [11]:
loss, accuracy = model.evaluate(features.to_numpy(), target.to_numpy())
print(f'loss: {loss}, accuracy: {accuracy}')

loss: 0.23993106186389923, accuracy: 0.8772045373916626
