# DEEP Learning with Tensorflow

#### Install required packages

In [1]:
%pip install pandas numpy scikit-learn matplotlib seaborn

Collecting seaborn
  Using cached seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Using cached seaborn-0.13.2-py3-none-any.whl (294 kB)
Installing collected packages: seaborn
Successfully installed seaborn-0.13.2
Note: you may need to restart the kernel to use updated packages.


## Explore the dataset

In [2]:
# Load the dataset
import pandas as pd

# load the training dataset (excluding rows with null values)
penguins = pd.read_csv('./../../data/penguins.csv').dropna()

print(penguins.size)

1710


In [3]:
penguins.head()

Unnamed: 0,CulmenLength,CulmenDepth,FlipperLength,BodyMass,Species
0,39.1,18.7,181.0,3750.0,0
1,39.5,17.4,186.0,3800.0,0
2,40.3,18.0,195.0,3250.0,0
4,36.7,19.3,193.0,3450.0,0
5,39.3,20.6,190.0,3650.0,0


In [4]:
# Data Normalization
penguins['FlipperLength'] = penguins['FlipperLength'] / 10
penguins['BodyMass'] = penguins['BodyMass'] / 100

In [5]:
penguins.head()

Unnamed: 0,CulmenLength,CulmenDepth,FlipperLength,BodyMass,Species
0,39.1,18.7,18.1,37.5,0
1,39.5,17.4,18.6,38.0,0
2,40.3,18.0,19.5,32.5,0
4,36.7,19.3,19.3,34.5,0
5,39.3,20.6,19.0,36.5,0


# Handle Dataset Size

Dataset is too small for useful deep learning. Increase dataset size with **oversamling**.

In [6]:
print(penguins.size)

# Oversampling
for i in range(3):
    penguins = pd.concat([penguins, penguins], ignore_index=True)

print(penguins.size)

1710
13680


In [7]:
# Display a random sample of 10 observations
sample = penguins.sample(10)
sample

Unnamed: 0,CulmenLength,CulmenDepth,FlipperLength,BodyMass,Species
1955,44.5,14.7,21.4,48.5,1
1409,36.0,18.5,18.6,31.0,0
402,41.3,21.1,19.5,44.0,0
456,42.7,18.3,19.6,40.75,0
818,41.1,17.5,19.0,39.0,0
2050,50.8,19.0,21.0,41.0,2
2282,45.2,16.4,22.3,59.5,1
1053,37.9,18.6,17.2,31.5,0
1635,44.5,15.7,21.7,48.75,1
966,46.0,18.9,19.5,41.5,2


In [8]:
# Target - species value
penguins['Species'].unique()

array([0, 1, 2])

In [9]:
penguins['Species'].unique()

array([0, 1, 2])

In [10]:
penguin_classes = ['Adelie', 'Gentoo', 'Chinstrap']

print(sample.columns[0:5].values, 'SpeciesName')

for index, row in penguins.sample(10).iterrows():
    print('[',row[0], row[1], row[2],row[3], int(row[4]), ']',penguin_classes[int(row[-1])])

['CulmenLength' 'CulmenDepth' 'FlipperLength' 'BodyMass' 'Species'] SpeciesName
[ 36.2 17.3 18.7 33.0 0 ] Adelie
[ 43.2 19.0 19.7 47.75 0 ] Adelie
[ 50.9 19.1 19.6 35.5 2 ] Chinstrap
[ 34.4 18.4 18.4 33.25 0 ] Adelie
[ 48.2 15.6 22.1 51.0 1 ] Gentoo
[ 47.3 15.3 22.2 52.5 1 ] Gentoo
[ 48.4 14.4 20.3 46.25 1 ] Gentoo
[ 45.5 13.9 21.0 42.0 1 ] Gentoo
[ 47.5 15.0 21.8 49.5 1 ] Gentoo
[ 47.2 15.5 21.5 49.75 1 ] Gentoo


  print('[',row[0], row[1], row[2],row[3], int(row[4]), ']',penguin_classes[int(row[-1])])


In [11]:
penguins['Species'].unique()

array([0, 1, 2])

## Split the dataset

In [12]:
from sklearn.model_selection import train_test_split

features = ['CulmenLength','CulmenDepth','FlipperLength','BodyMass']
label = 'Species'
   
# Split data 70%-30% into training set and test set
x_train, x_test, y_train, y_test = train_test_split(penguins[features].values,
                                                    penguins[label].values,
                                                    test_size=0.30,
                                                    random_state=0)

print ('Training Set: %d, Test Set: %d \n' % (len(x_train), len(x_test)))
print("Sample of features and labels:")

# Take a look at the first 25 training features and corresponding labels
for n in range(0,24):
    print(x_train[n], y_train[n], '(' + penguin_classes[y_train[n]] + ')')

Training Set: 1915, Test Set: 821 

Sample of features and labels:
[42.5 20.7 19.7 45. ] 0 (Adelie)
[45.6 20.3 19.1 46. ] 0 (Adelie)
[42.6 13.7 21.3 49.5] 1 (Gentoo)
[40.3 18.5 19.6 43.5] 0 (Adelie)
[39.7 17.7 19.3 32. ] 0 (Adelie)
[46.4 17.8 19.1 37. ] 2 (Chinstrap)
[38.8 20.  19.  39.5] 0 (Adelie)
[48.8 16.2 22.2 60. ] 1 (Gentoo)
[46.2 17.5 18.7 36.5] 2 (Chinstrap)
[59.6 17.  23.  60.5] 1 (Gentoo)
[41.5 18.5 20.1 40. ] 0 (Adelie)
[39.5 16.7 17.8 32.5] 0 (Adelie)
[50.5 15.9 22.5 54. ] 1 (Gentoo)
[45.6  19.4  19.4  35.25] 2 (Chinstrap)
[37.9  18.6  19.3  29.25] 0 (Adelie)
[51.1 16.3 22.  60. ] 1 (Gentoo)
[52.8 20.  20.5 45.5] 2 (Chinstrap)
[39.6 17.2 19.6 35.5] 0 (Adelie)
[51.1 16.3 22.  60. ] 1 (Gentoo)
[36.7 18.8 18.7 38. ] 0 (Adelie)
[49.1 15.  22.8 55. ] 1 (Gentoo)
[37.6 19.1 19.4 37.5] 0 (Adelie)
[36.  17.8 19.5 34.5] 0 (Adelie)
[39.1 18.7 18.1 37.5] 0 (Adelie)


## Install and import tensorflow

In [13]:
%pip install --no-cache-dir tensorflow

Collecting tensorflow
  Downloading tensorflow-2.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.1 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Downloading absl_py-2.3.0-py3-none-any.whl.metadata (2.4 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Downloading flatbuffers-25.2.10-py2.py3-none-any.whl.metadata (875 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)
  Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-py2.py3-none-manylinux2010_x86_64.whl.metadata (5.2 kB)
Collecting opt-einsum>=2.3.2 (from tensorflow)
  Downloading opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 kB)
Collecting protobuf!=4.21.0,!

In [16]:
# Import necessary libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import models
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import utils
from tensorflow.keras import optimizers

# Set random seed for reproducibility
tf.random.set_seed(0)

import importlib.metadata

print("Libraries imported.")

Libraries imported.


In [17]:
# Check tesnsorflow && keras version
print('TensorFlow version:', tf.__version__)

try:
    keras_version = importlib.metadata.version("keras")
    print('Keras version:', keras_version)
except importlib.metadata.PackageNotFoundError:
    print("Keras is not installed separately, using TensorFlow's built-in Keras.")

TensorFlow version: 2.19.0
Keras version: 3.10.0


In [18]:
# Set data types for float features
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

# Set data types for categorical labels
y_train = utils.to_categorical(y_train)
y_test = utils.to_categorical(y_test)
print('Ready...')

Ready...
