<a href="https://colab.research.google.com/github/cedjustin/AI/blob/master/sprint14.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [184]:
# import the dependencies
from keras.models import Sequential
from keras.layers import Dense, Activation, Input
from keras.optimizers import Adam
from keras.losses import BinaryCrossentropy, CategoricalCrossentropy, MeanSquaredError, SparseCategoricalCrossentropy
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from keras.datasets import mnist

[Problem 1] Sharing and executing the official tutorial model

Link to the [tutorial](https://www.tensorflow.org/tutorials/quickstart/beginner)

The results
- loss: 0.0776
- accuracy: 0.9758

In [194]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10)
])

predictions = model(x_train[:1]).numpy()
predictions

array([[ 0.22898045,  0.71390325,  0.9598954 ,  0.23749118, -0.65243226,
        -0.10377167, -0.24392913,  0.28863388, -0.40824512,  0.21461129]],
      dtype=float32)

In [170]:
tf.nn.softmax(predictions).numpy()

array([[0.14133231, 0.06326488, 0.07247043, 0.13620217, 0.10552537,
        0.10426141, 0.07537098, 0.11691067, 0.09922493, 0.08543681]],
      dtype=float32)

In [171]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [172]:
loss_fn(y_train[:1], predictions).numpy()

2.260854

In [173]:
model.compile(optimizer='adam',
              loss=loss_fn,
              metrics=['accuracy'])

In [174]:
model.fit(x_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7fb77d9a6c50>

In [175]:
model.evaluate(x_test,  y_test, verbose=2)

313/313 - 0s - loss: 0.0776 - accuracy: 0.9758


[0.0776393860578537, 0.9757999777793884]

[Problem 3] Learning Iris (binary classification) with Keras

In [95]:
#Load dataset
df = pd.read_csv("Iris.csv")
df = df[df["Species"]!="Iris-setosa"]
#Condition extraction from data frame
y = df['Species']
X = df.loc[:, ["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm"]]
# NumPy 配列に変換
X = np.array(X)
y = np.array(y)
# Convert label to number
y[y == "Iris-versicolor"] = 0
y[y == "Iris-virginica"] = 1
y = y.astype(np.int64)[:, np.newaxis]

#Split into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [96]:
def classification_evaluation(X_train, y_train, X_test, crossentropy, verbose, output_units):
  model = Sequential()
  model.add(Dense(50, activation=tf.nn.relu))
  model.add(Dense(100, activation=tf.nn.relu))
  model.add(Dense(output_units, activation=tf.nn.sigmoid))
  model.compile(optimizer=Adam(learning_rate=0.001), loss=crossentropy(), metrics=["accuracy"])
  model.fit(X_train, y_train, shuffle=True, batch_size=10, epochs=300, verbose=verbose)
  score = model.evaluate(X_train, y_train, verbose=0)
  print('Train loss:', score[0])
  print('Train accuracy:', score[1])
  pred_proba = model.predict(X_test,batch_size=10)[:, 0]
  rounded_preds = np.where(pred_proba > 0.5, 1, 0)
  print(rounded_preds)

In [97]:
classification_evaluation(X_train, y_train, X_test, BinaryCrossentropy, 0, 1)

Train loss: 0.030822638422250748
Train accuracy: 0.987500011920929
[0 1 0 1 1 1 0 1 1 1 1 1 1 0 0 0 1 0 1 0]


[Problem 4] Learn Iris (multi-level classification) with Keras

In [198]:
#Load dataset
df = pd.read_csv("Iris.csv")
#Condition extraction from data frame
y = df["Species"]
X = df.loc[:, ["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm"]]
# NumPy 配列に変換
X = np.array(X)
y = np.array(y)
# Convert label to number
y[y == "Iris-setosa"] = 0
y[y == "Iris-versicolor"] = 1
y[y == "Iris-virginica"] = 2
y = y.astype(np.int64)[:, np.newaxis]

#Split into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
enc = OneHotEncoder(handle_unknown='ignore', sparse=False)
y_train_one_hot = enc.fit_transform(y_train)
y_test_one_hot = enc.transform(y_test)

In [200]:
  model = Sequential()
  model.add(Dense(50, activation=tf.nn.relu))
  model.add(Dense(100, activation=tf.nn.relu))
  model.add(Dense(3, activation=tf.nn.softmax))
  model.compile(optimizer=Adam(learning_rate=0.001), loss=CategoricalCrossentropy(), metrics=["accuracy"])
  model.fit(X_train, y_train_one_hot, shuffle=True, batch_size=10, epochs=300, verbose=0)
  score = model.evaluate(X_train, y_train_one_hot, verbose=0)
  print('Train loss:', score[0])
  print('Train accuracy:', score[1])
  pred_proba = model.predict(X_test,batch_size=10)[:, 0]
  rounded_preds = np.where(pred_proba > 0.5, 1, 0)
  print(rounded_preds)

Train loss: 0.05995071679353714
Train accuracy: 0.9750000238418579
[0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 1 1 0 1 1 0 0 1]


[Problem 5] Learning House Prices with Keras

In [126]:
house_df = pd.read_csv("house_prices.csv")
house_df.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,...,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2003,2003,Gable,CompShg,VinylSd,VinylSd,BrkFace,196.0,Gd,TA,PConc,Gd,TA,No,GLQ,706,Unf,0,150,856,GasA,...,Y,SBrkr,856,854,0,1710,1,0,2,1,3,1,Gd,8,Typ,0,,Attchd,2003.0,RFn,2,548,TA,TA,Y,0,61,0,0,0,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,FR2,Gtl,Veenker,Feedr,Norm,1Fam,1Story,6,8,1976,1976,Gable,CompShg,MetalSd,MetalSd,,0.0,TA,TA,CBlock,Gd,TA,Gd,ALQ,978,Unf,0,284,1262,GasA,...,Y,SBrkr,1262,0,0,1262,0,1,2,0,3,1,TA,6,Typ,1,TA,Attchd,1976.0,RFn,2,460,TA,TA,Y,298,0,0,0,0,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2001,2002,Gable,CompShg,VinylSd,VinylSd,BrkFace,162.0,Gd,TA,PConc,Gd,TA,Mn,GLQ,486,Unf,0,434,920,GasA,...,Y,SBrkr,920,866,0,1786,1,0,2,1,3,1,Gd,6,Typ,1,TA,Attchd,2001.0,RFn,2,608,TA,TA,Y,0,42,0,0,0,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,Corner,Gtl,Crawfor,Norm,Norm,1Fam,2Story,7,5,1915,1970,Gable,CompShg,Wd Sdng,Wd Shng,,0.0,TA,TA,BrkTil,TA,Gd,No,ALQ,216,Unf,0,540,756,GasA,...,Y,SBrkr,961,756,0,1717,1,0,1,0,3,1,Gd,7,Typ,1,Gd,Detchd,1998.0,Unf,3,642,TA,TA,Y,0,35,272,0,0,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,FR2,Gtl,NoRidge,Norm,Norm,1Fam,2Story,8,5,2000,2000,Gable,CompShg,VinylSd,VinylSd,BrkFace,350.0,Gd,TA,PConc,Gd,TA,Av,GLQ,655,Unf,0,490,1145,GasA,...,Y,SBrkr,1145,1053,0,2198,1,0,2,1,4,1,Gd,9,Typ,1,TA,Attchd,2000.0,RFn,3,836,TA,TA,Y,192,84,0,0,0,0,,,,0,12,2008,WD,Normal,250000


In [143]:
h_X = house_df[['GrLivArea','YearBuilt']].to_numpy()
h_y = house_df['SalePrice'].to_numpy()

In [144]:
h_X.shape, h_y.shape

((1460, 2), (1460,))

In [145]:
h_X_train, h_X_test, h_y_train, h_y_test = train_test_split(h_X, h_y, train_size = 0.2, random_state=0)

In [148]:
model = Sequential()
model.add(Dense(50, activation=tf.nn.relu))
model.add(Dense(100, activation=tf.nn.relu))
model.add(Dense(1, activation=tf.keras.activations.linear))
model.compile(optimizer=Adam(learning_rate=0.001), loss=MeanSquaredError(), metrics=["mse"])
model.fit(h_X_train, h_y_train, shuffle=True, batch_size=10, epochs=20, verbose=0)
score = model.evaluate(h_X_train, h_y_train, verbose=0)
print('Loss:', score[0])
print('MSE:', score[1])
pred_proba = model.predict(h_X_test,batch_size=10)
print(pred_proba.ravel())
print(h_y_test.ravel())

Loss: 2821825024.0
MSE: 2821825024.0
[235580.05 179788.69 158150.69 ... 179225.88 209339.27 161379.23]
[200624 133000 110000 ... 133000 222500 162500]


[Problem 6] Learning MNIST with Keras

In [186]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train[0:60000,:]
y_train = y_train[0:60000]
X_test = X_test[0:60000,:]
y_test = y_test[0:60000]
X_train.shape

(60000, 28, 28)

In [187]:
# flattening the subsets
X_train = X_train.reshape(-1,784)
X_test = X_test.reshape(-1,784)

In [188]:
# pre processing
X_train = X_train.astype(np.float)
X_test = X_test.astype(np.float)
X_train /= 255
X_test /= 255

In [189]:
# splitting our subsets into train and validation subsets
m_X_train, m_X_test, m_y_train, m_y_test = train_test_split(X_train, y_train, test_size=0.2, random_state= 0)

In [190]:
enc = OneHotEncoder(handle_unknown='ignore', sparse=False)
y_train_one_hot = enc.fit_transform(m_y_train[:, np.newaxis])
y_test_one_hot = enc.transform(m_y_test[:, np.newaxis])

In [197]:
model = Sequential()
model.add(Dense(50, activation=tf.nn.relu))
model.add(Dense(100, activation=tf.nn.relu))
model.add(Dense(10, activation=tf.nn.softmax))
model.compile(optimizer=Adam(learning_rate=0.001), loss=CategoricalCrossentropy(), metrics=["accuracy"])
model.fit(m_X_train, y_train_one_hot, shuffle=True, batch_size=10, epochs=300, verbose=0)
score = model.evaluate(m_X_train, y_train_one_hot, verbose=0)
print('Train loss:', score[0])
print('Train accuracy:', score[1])
pred_proba = model.predict(m_X_test,batch_size=10)
pred = np.argmax(pred_proba, 1)
print(pred)

Train loss: 0.007448898162692785
Train accuracy: 0.9994999766349792
[3 6 6 ... 5 1 6]
