<a href="https://colab.research.google.com/github/benson1231/DeepLearning/blob/main/NeuralNetwork.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')
data_path = "drive/MyDrive/MachineLearning/insurance.csv"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import Normalizer

# 載入數據集
dataset = pd.read_csv(data_path)
# 選擇前6列作為特徵
features = dataset.iloc[:,0:6]
# 選擇最後一列作為目標標籤
labels = dataset.iloc[:,-1]

# 對類別型變數進行one-hot編碼
features = pd.get_dummies(features)
# 將數據分為訓練集和測試集
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size=0.33, random_state=42)

# 使用ColumnTransformer，對指定列進行標準化
my_ct = ColumnTransformer([('scale', StandardScaler(), ['age', 'bmi', 'children'])], remainder='passthrough')
# 對訓練集應用標準化，並轉換為DataFrame格式
features_train_scale = my_ct.fit_transform(features_train)
# 對測試集應用訓練好的標準化轉換，並轉換為DataFrame格式
features_test_scale = my_ct.transform(features_test)

# ColumnTransformer返回的是numpy陣列，需轉換回pandas DataFrame
features_train_scale = pd.DataFrame(features_train_scale, columns = features_train.columns)
features_test_scale = pd.DataFrame(features_test_scale, columns = features_test.columns)

# 列印數據統計摘要
print(features_train_scale.describe())
print(features_test_scale.describe())

               age         sex         bmi  children  smoker region_0rtheast  \
count   896.000000  896.000000  896.000000       896     896             896   
unique   47.000000  455.000000    6.000000         2       2               2   
top      -1.494934    0.293843   -0.912607         0       0           False   
freq     53.000000   12.000000  383.000000       459     708             666   

       region_0rthwest region_southeast region_southwest  
count              896              896              896  
unique               2                2                2  
top              False            False            False  
freq               670              667              685  
               age         sex         bmi  children  smoker region_0rtheast  \
count   442.000000  442.000000  442.000000       442     442             442   
unique   47.000000  312.000000    6.000000         2       2               2   
top      -1.424533   -0.484495   -0.912607         1       0    

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers


def design_model(features):
  model = Sequential()
  return model

dataset = pd.read_csv(data_path) #load the dataset
features = dataset.iloc[:,0:6] #choose first 7 columns as features
labels = dataset.iloc[:,-1] #choose the final column for prediction

features = pd.get_dummies(features) #one-hot encoding for categorical variables
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size=0.33, random_state=42) #split the data into training and test data

#standardize
ct = ColumnTransformer([('standardize', StandardScaler(), ['age', 'bmi', 'children'])], remainder='passthrough')
features_train = ct.fit_transform(features_train)
features_test = ct.transform(features_test)

#invoke the function for our model design
model = design_model(features_train)

#print the layers
print(model.layers)

[]


In [None]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers


def design_model(features):
  model = Sequential(name = "my_first_model")
  #get the number of features/dimensions in the data
  num_features = features.shape[1]
  #without hard-coding
  input = layers.InputLayer(input_shape=(num_features,))
  #adding the input layer
  model.add(input)
  return model


dataset = pd.read_csv(data_path) #load the dataset
features = dataset.iloc[:,0:6] #choose first 7 columns as features
labels = dataset.iloc[:,-1] #choose the final column for prediction

features = pd.get_dummies(features) #one-hot encoding for categorical variables
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size=0.33, random_state=42) #split the data into training and test data

#standardize
ct = ColumnTransformer([('standardize', StandardScaler(), ['age', 'bmi', 'children'])], remainder='passthrough')
features_train = ct.fit_transform(features_train)
features_test = ct.transform(features_test)

#invoke the function for our model design
model = design_model(features_train)
print(model.summary())



None


In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer, Dense
from tensorflow.keras.optimizers import Adam

tf.random.set_seed(35)  # for reproducibility

def design_model(features):
    model = Sequential(name="my_first_model")
    # Input layer
    input = InputLayer(input_shape=(features.shape[1],))
    model.add(input)
    # Hidden layer with 128 neurons
    model.add(Dense(128, activation='relu'))
    # Output layer
    model.add(Dense(1))
    # Compile model
    opt = Adam(learning_rate=0.01)
    model.compile(loss='mse', metrics=['mae'], optimizer=opt)
    return model

# 讀取資料
dataset = pd.read_csv(data_path)

# 特徵與目標變數
features = dataset.iloc[:, 0:6]  # 前6列為特徵
labels = dataset.iloc[:, -1]  # 最後一列為目標變數

# One-hot 編碼處理類別型變數
features = pd.get_dummies(features)

# 資料分割為訓練集和測試集
features_train, features_test, labels_train, labels_test = train_test_split(
    features, labels, test_size=0.33, random_state=42)

# 標準化處理
ct = ColumnTransformer([('standardize', StandardScaler(), ['age', 'bmi', 'children'])],
                       remainder='passthrough')
features_train = ct.fit_transform(features_train)
features_test = ct.transform(features_test)

# 確保數據轉換為數值格式
features_train = np.array(features_train, dtype=np.float32)
features_test = np.array(features_test, dtype=np.float32)
labels_train = np.array(labels_train, dtype=np.float32)
labels_test = np.array(labels_test, dtype=np.float32)

# 設計模型
model = design_model(features_train)
print(model.summary())

# 訓練模型
model.fit(features_train, labels_train, epochs=40, batch_size=1, verbose=1)

# 評估模型
val_mse, val_mae = model.evaluate(features_test, labels_test, verbose=0)

print("MAE: ", val_mae)



None
Epoch 1/40
[1m896/896[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 284505792.0000 - mae: 11909.1807
Epoch 2/40
[1m896/896[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 105626720.0000 - mae: 7225.4492
Epoch 3/40
[1m896/896[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 77020864.0000 - mae: 6664.0410
Epoch 4/40
[1m896/896[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 59088804.0000 - mae: 5757.7139
Epoch 5/40
[1m896/896[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 47343892.0000 - mae: 5148.5635
Epoch 6/40
[1m896/896[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 41088344.0000 - mae: 4719.7939
Epoch 7/40
[1m896/896[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 38482092.0000 - mae: 4455.9580
Epoch 8/40
[1m896/896[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 37558920.0000 - mae: 43

In [None]:
!pip install keras



In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint as sp_randint
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import make_scorer
from model import design_model, features_train, labels_train

#------------- GRID SEARCH --------------
def do_grid_search():
  batch_size = [6, 64]
  epochs = [10, 50]
  model = KerasRegressor(build_fn=design_model)
  param_grid = dict(batch_size=batch_size, epochs=epochs)
  grid = GridSearchCV(estimator = model, param_grid=param_grid, scoring = make_scorer(mean_squared_error, greater_is_better=False),return_train_score = True)
  grid_result = grid.fit(features_train, labels_train, verbose = 0)
  print(grid_result)
  print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

  means = grid_result.cv_results_['mean_test_score']
  stds = grid_result.cv_results_['std_test_score']
  params = grid_result.cv_results_['params']
  for mean, stdev, param in zip(means, stds, params):
      print("%f (%f) with: %r" % (mean, stdev, param))

  print("Traininig")
  means = grid_result.cv_results_['mean_train_score']
  stds = grid_result.cv_results_['std_train_score']
  for mean, stdev, param in zip(means, stds, params):
      print("%f (%f) with: %r" % (mean, stdev, param))

#------------- RANDOMIZED SEARCH --------------
def do_randomized_search():
  param_grid = {'batch_size': sp_randint(2, 16), 'nb_epoch': sp_randint(10, 100)}
  model = KerasRegressor(build_fn=design_model)
  grid = RandomizedSearchCV(estimator = model, param_distributions=param_grid, scoring = make_scorer(mean_squared_error, greater_is_better=False), n_iter = 12)
  grid_result = grid.fit(features_train, labels_train, verbose = 0)
  print(grid_result)
  print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

  means = grid_result.cv_results_['mean_test_score']
  stds = grid_result.cv_results_['std_test_score']
  params = grid_result.cv_results_['params']
  for mean, stdev, param in zip(means, stds, params):
      print("%f (%f) with: %r" % (mean, stdev, param))

print("-------------- GRID SEARCH --------------------")
do_grid_search()
print("-------------- RANDOMIZED SEARCH --------------------")
do_randomized_search()


ModuleNotFoundError: No module named 'keras.wrappers'

In [4]:
from model import features_train, labels_train, fit_model
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping
from plotting import plot
import matplotlib.pyplot as plt

def design_model_dropout(X, learning_rate):
    model = Sequential(name="my_first_model")
    input = tf.keras.Input(shape=(X.shape[1],))
    model.add(input)
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dropout(0.1))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(24, activation='relu'))
    #------your code here!------
    model.add(layers.Dropout(0.2))

    model.add(layers.Dense(1))
    opt = tf.keras.optimizers.Adam(learning_rate = learning_rate)
    model.compile(loss='mse', metrics=['mae'], optimizer=opt)
    return model

def design_model_no_dropout(X, learning_rate):
    model = Sequential(name="my_first_model")
    input = layers.InputLayer(input_shape=(X.shape[1],))
    model.add(input)
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(24, activation='relu'))
    model.add(layers.Dense(1))
    opt = tf.keras.optimizers.Adam(learning_rate = learning_rate)
    model.compile(loss='mse', metrics=['mae'], optimizer=opt)
    return model

#using the early stopping in fit_model
learning_rate = 0.001
num_epochs = 200
#train the model without dropout
history1 = fit_model(design_model_no_dropout(features_train, learning_rate), features_train, labels_train, learning_rate, num_epochs)
#train the model with dropout
history2 = fit_model(design_model_dropout(features_train, learning_rate), features_train, labels_train, learning_rate, num_epochs)

plot(history1, 'static/images/no_dropout.png')

plot(history2, 'static/images/with_dropout.png')

import app #don't worry about this. This is to show you the plot in the browser.

ModuleNotFoundError: No module named 'model'

In [5]:
import tensorflow as tf

# 確認是否使用 GPU
print("GPU Available: ", tf.config.list_physical_devices('GPU'))

# 測試 GPU 運算
a = tf.constant([[1.0, 2.0], [3.0, 4.0]])
b = tf.constant([[1.0, 1.0], [0.0, 1.0]])
c = tf.matmul(a, b)
print(c)


GPU Available:  []
tf.Tensor(
[[1. 3.]
 [3. 7.]], shape=(2, 2), dtype=float32)


In [None]:
b