# Using Tensorflow Dataset and Modeling


# Imports

In [None]:
# !pip uninstall tf-keras
# !pip install tensorflow==2.16.1

In [2]:
import keras
import tensorflow as tf
print("Keras Current Version:", keras.__version__, "Tensorflow Current Version:", tf.__version__)

Keras Current Version: 3.5.0 Tensorflow Current Version: 2.17.0


In [4]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense
import tensorflow as tf

# Data Preparation

## Task 1: proprocess_data Fonksiyonunda MinMaxScaler'ı Kullanınız.

- Import bölümünde MinMaxScaler'ı import etmeyi unutmayınız.
- preprocess_data fonksiyonu içinde scaler olarak MinMaxScaler'ı kullanınız.
- Sonrasınra train validasyon ayrımını yapınız.

In [49]:
def preprocess_data(filepath):
    data = pd.read_csv(filepath)
    # Buraya MinMaxScaler ekleyiniz.
    scaler = MinMaxScaler()
    X = scaler.fit_transform(data.drop('Outcome', axis=1))
    y = data['Outcome'].values
    return X, y


X, y = preprocess_data('data_sets/diabetes.csv')

# train validasyon ayrımını yapınız.
X_train, X_val, y_train, y_val = train_test_split(X,y,test_size=0.2,random_state=42)

print("X_train shape:", X_train.shape)

X_train shape: (614, 8)


# Create Tensorflow Dataset


## Task 2: Train Seti ve Validasyon Seti için Tensorflow Dataset Oluşturunuz

In [73]:
train_dataset = tf.data.Dataset.from_tensor_slices((X_train,y_train))
val_dataset = tf.data.Dataset.from_tensor_slices((X_val,y_val))

## Task 3: val_dataset'in 1 Gözlemini İnceleyiniz

In [76]:
for features,label in val_dataset.take(1):
    print("f:", features.numpy(), "l:",label.numpy())

f: [0.35294118 0.49246231 0.47540984 0.33333333 0.22458629 0.50670641
 0.15029889 0.36666667] l: 0


## Task 4: buffer_size'ı tüm veri seti boyutu olarak ve batch'ı 32 olacak şekilde train ve validasyon setlerini biçimlendiriniz. Validasyon seti için sadece batch uyguladığımızı unutmayın.

In [78]:
train_dataset = train_dataset.shuffle(buffer_size=len(X_train)).batch(32)
val_dataset = val_dataset.batch(32)

## Task 5: Validasyon Seti için 1 Gözlemi İnceleyiniz. 10. batch'in feature değerleri nelerdir? Label değeri nedir? Amelasyon ile tek tek sayarak inceleyiniz.

In [82]:
for features,label in val_dataset.take(1):
    print("f:", features.numpy(),"l:",label.numpy())
# daha onceden take(1) dedigimizde tek deger gelirken batch(32) yaptıktan sonra take(1) ile 32 deger birden geldi

f: [[0.35294118 0.49246231 0.47540984 0.33333333 0.22458629 0.50670641
  0.15029889 0.36666667]
 [0.11764706 0.56281407 0.6147541  0.32323232 0.         0.53204173
  0.02988898 0.        ]
 [0.11764706 0.54271357 0.52459016 0.         0.         0.45901639
  0.03415884 0.        ]
 [0.47058824 0.53768844 0.6557377  0.         0.         0.36661699
  0.33219471 0.21666667]
 [0.41176471 0.68341709 0.73770492 0.         0.         0.44560358
  0.05636208 0.48333333]
 [0.35294118 0.51758794 0.59016393 0.32323232 0.22458629 0.56184799
  0.10503843 0.56666667]
 [0.05882353 0.35678392 0.39344262 0.18181818 0.08983452 0.30402385
  0.10461144 0.01666667]
 [0.         0.5879397  0.         0.         0.         0.50372578
  0.3646456  0.38333333]
 [0.23529412 0.77386935 0.59016393 0.29292929 0.14893617 0.46646796
  0.11101623 0.26666667]
 [0.29411765 0.73869347 0.63934426 0.         0.         0.50223547
  0.05977797 0.73333333]
 [0.58823529 0.55778894 0.57377049 0.27272727 0.         0.40983607

# Model

## Task 6: Dersteki modeli 100 epoch sayısı ile tekrar eğitiniz ve loss ve accuracy değerlerini yorumlayınız. Tüm işlemleri tek bir hücrede yapınız.

In [88]:
model = Sequential()
model.add(Input(shape=(X_train.shape[1],)))
model.add(Dense(32,activation="relu"))
model.add(Dense(16,activation="relu"))
model.add(Dense(1,activation="sigmoid"))
model.compile(loss="binary_crossentropy",metrics=["accuracy"])

In [90]:
model.fit(train_dataset,epochs=100,validation_data=val_dataset)

Epoch 1/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.5076 - loss: 0.6953 - val_accuracy: 0.6429 - val_loss: 0.6729
Epoch 2/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6380 - loss: 0.6689 - val_accuracy: 0.6429 - val_loss: 0.6552
Epoch 3/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6700 - loss: 0.6450 - val_accuracy: 0.6429 - val_loss: 0.6477
Epoch 4/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6349 - loss: 0.6476 - val_accuracy: 0.6429 - val_loss: 0.6417
Epoch 5/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6565 - loss: 0.6325 - val_accuracy: 0.6429 - val_loss: 0.6347
Epoch 6/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6779 - loss: 0.6138 - val_accuracy: 0.6429 - val_loss: 0.6272
Epoch 7/100
[1m20/20[0m [32m━━━

<keras.src.callbacks.history.History at 0x21302b591f0>

In [None]:
# derste elde edilen değerler
# val_loss: 0.59
# val_accuracy: 0.70

# benim  yaptığım
# val_loss: 0.51
# val_acc: 0.76 

In [65]:
# Task 6 sonucu:
# Validation Loss: 0.51
# Validation Accuracy: 0.76
# Yorum:veri daha cok iterasyon yaptığı için daha iyi öğrendi ancak daha da fazla iterasyon overfittinge neden olabilir

## Task 7: 5 katmanlı ve nöron sayıları sırasıyla 32, 64, 128, 256, 512 olan bir model kurunuz ve sonuçları değerlendiriniz. Diğer özelliklerde bir değişiklik olmamalı.

In [31]:
model = Sequential()
model.add(Input(shape=(X_train.shape[1],)))
model.add(Dense(32,activation="relu"))
model.add(Dense(64,activation="relu"))
model.add(Dense(128,activation="relu"))
model.add(Dense(256,activation="relu"))
model.add(Dense(512,activation="relu"))
model.add(Dense(1,activation="sigmoid"))

model.compile(loss="binary_crossentropy",metrics=["accuracy"])

In [33]:
model.fit(train_dataset,epochs=100,validation_data=val_dataset)

Epoch 1/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.6430 - loss: 0.6726 - val_accuracy: 0.6429 - val_loss: 0.6210
Epoch 2/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6790 - loss: 0.6245 - val_accuracy: 0.6753 - val_loss: 0.5814
Epoch 3/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6913 - loss: 0.5827 - val_accuracy: 0.5260 - val_loss: 0.6987
Epoch 4/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6773 - loss: 0.5770 - val_accuracy: 0.6429 - val_loss: 0.8587
Epoch 5/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6851 - loss: 0.6044 - val_accuracy: 0.7078 - val_loss: 0.5496
Epoch 6/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7519 - loss: 0.5159 - val_accuracy: 0.7338 - val_loss: 0.5376
Epoch 7/100
[1m20/20[0m [32m━━━

<keras.src.callbacks.history.History at 0x21305121220>

In [35]:
# Yorum: loss degerimiz cok arttı, dogrulugumuz dustu

# Neden?: agırlıklarımız derinliklerde kayboldu ve verilerimize loss degerine sebep oldukları
# icin ceza kesemedi. modelin karmaşıklığını arttırdık

## Task 8: Acaba Epoch Sayısını Arttırsak bir Faydası Olur mu? 1000 ile deneyelim. Sonuçları yorumlayınız.

In [37]:
model = Sequential()
model.add(Input(shape=(X_train.shape[1],)))
model.add(Dense(32,activation="relu"))
model.add(Dense(64,activation="relu"))
model.add(Dense(128,activation="relu"))
model.add(Dense(256,activation="relu"))
model.add(Dense(512,activation="relu"))
model.add(Dense(1,activation="sigmoid"))

model.compile(loss="binary_crossentropy",metrics=["accuracy"])


In [None]:
model.fit(train_dataset,epochs=1000,validation_data=val_dataset)

Epoch 1/1000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.6222 - loss: 0.6687 - val_accuracy: 0.6429 - val_loss: 0.6682
Epoch 2/1000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6715 - loss: 0.6144 - val_accuracy: 0.6104 - val_loss: 0.6751
Epoch 3/1000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7138 - loss: 0.5735 - val_accuracy: 0.5779 - val_loss: 0.6812
Epoch 4/1000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6745 - loss: 0.5788 - val_accuracy: 0.6558 - val_loss: 0.5867
Epoch 5/1000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7446 - loss: 0.5209 - val_accuracy: 0.6948 - val_loss: 0.5639
Epoch 6/1000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7575 - loss: 0.5178 - val_accuracy: 0.6104 - val_loss: 0.6528
Epoch 7/1000
[1m20/20[0m 

In [92]:
# Yorum: daha da kötü sonuçlar aldım. sebebi modelimin overfitting yaşaması ve train setimi ezberlemesi
# epoch ve model parametrelerini sağlıklı olarak belirlemek çok çok önemli onu anladık