In [25]:
import boto3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf

## Defining the bucket
s3 = boto3.resource('s3')
bucket_name = 'danhtran358-data-445-bucket'
bucket = s3.Bucket(bucket_name)

## Defining the csv file
file_key = 'CarPrice_Assignment.csv'

bucket_object = bucket.Object(file_key)
file_object = bucket_object.get()
file_content_stream = file_object.get('Body')

## Reading the csv file
car_price = pd.read_csv(file_content_stream)
car_price.head()

Unnamed: 0,car_ID,symboling,CarName,fueltype,aspiration,doornumber,carbody,drivewheel,enginelocation,wheelbase,...,enginesize,fuelsystem,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg,price
0,1,3,alfa-romero giulia,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,13495.0
1,2,3,alfa-romero stelvio,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,16500.0
2,3,1,alfa-romero Quadrifoglio,gas,std,two,hatchback,rwd,front,94.5,...,152,mpfi,2.68,3.47,9.0,154,5000,19,26,16500.0
3,4,2,audi 100 ls,gas,std,four,sedan,fwd,front,99.8,...,109,mpfi,3.19,3.4,10.0,102,5500,24,30,13950.0
4,5,2,audi 100ls,gas,std,four,sedan,4wd,front,99.4,...,136,mpfi,3.19,3.4,8.0,115,5500,18,22,17450.0


In [26]:
## Defining the input and target variables
X = car_price[['wheelbase', 'enginesize', 'compressionratio', 'horsepower', 'peakrpm', 'citympg']]
Y = car_price['price']

## Split the data into train & testing
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2)

In [27]:
## Transforming input data to 0-1
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [28]:
md1 = tf.keras.models.Sequential([
      tf.keras.layers.Dense(10, input_dim = 6, activation = 'relu'),
      tf.keras.layers.Dense(1)
])

md1.compile(optimizer = 'adam', loss = 'mse')
md1.fit(X_train, Y_train, epochs = 100, batch_size = 100, verbose = 0)

<keras.callbacks.History at 0x7fde80938160>

In [29]:
md1.evaluate(X_test, Y_test)



215570240.0

In [30]:
md1.layers[0].weights

[<tf.Variable 'dense_9/kernel:0' shape=(6, 10) dtype=float32, numpy=
 array([[-0.33000803,  0.20535848, -0.05036539,  0.375091  , -0.05274838,
         -0.18644874, -0.09245523,  0.00187641,  0.57820326, -0.27856022],
        [-0.21948123,  0.33636928, -0.27840415,  0.10213741, -0.5599851 ,
         -0.09512071,  0.3497115 , -0.46188736, -0.04773526, -0.12613994],
        [-0.15149412,  0.3456038 ,  0.28822094, -0.32340264, -0.21875879,
          0.5130969 ,  0.44383192,  0.23660088,  0.11594445,  0.795868  ],
        [-0.37595266,  0.2942815 ,  0.08387202, -0.7270189 ,  0.24063677,
          0.558434  , -0.41600576, -0.33237442,  0.4058625 , -0.16610464],
        [-0.27829567,  0.11691704, -0.18663335,  0.09741498, -0.49448106,
          0.00115369, -0.47100103, -0.33674437, -0.09184795,  0.18555234],
        [ 0.16545236,  0.8102423 , -0.5204469 ,  0.3687705 , -0.37518337,
          0.7570356 , -0.542767  , -0.4685967 ,  0.37447163,  0.7349155 ]],
       dtype=float32)>,
 <tf.Variabl

In [31]:
md2 = tf.keras.models.Sequential([
      tf.keras.layers.Dense(10, input_dim = 6, activation = 'relu'),
      tf.keras.layers.Dense(8, activation = 'relu'),
      tf.keras.layers.Dense(1)
])
md2.compile(optimizer = 'adam', loss = 'mse')
md2.fit(X_train, Y_train, epochs = 100, batch_size = 100, verbose = 0)

<keras.callbacks.History at 0x7fde807d1860>

In [32]:
md2.evaluate(X_test,Y_test)



215403200.0

In [33]:
md2.layers[0].weights

[<tf.Variable 'dense_11/kernel:0' shape=(6, 10) dtype=float32, numpy=
 array([[ 0.53149825,  0.15320355,  0.03004826,  0.6833294 ,  0.43425572,
          0.09300532,  0.12256465,  0.05317635,  0.3094386 , -0.08285594],
        [-0.0987654 ,  0.30386466,  0.8117745 ,  0.5541578 ,  0.4225659 ,
         -0.50492734,  0.4997188 , -0.27029303,  0.05529964,  0.3532479 ],
        [-0.25459382, -0.68143845,  0.5342126 , -0.21862586,  0.21304582,
          0.18493116, -0.26409745,  0.69367486,  0.10890777,  0.5237922 ],
        [ 0.05550654, -0.02687232,  0.42604214, -0.17542435,  0.34047806,
          0.04987343,  0.538965  ,  0.04246259,  0.6205754 ,  0.35282475],
        [ 0.26188052,  0.08373655, -0.01089149, -0.0202066 , -0.44689557,
         -0.26818362,  0.35242075,  0.654024  , -0.26285666,  0.8361786 ],
        [ 0.491068  , -0.5087387 ,  0.6641598 ,  0.707031  ,  0.39520645,
         -0.17907761, -0.09061486,  0.42494518,  0.844978  ,  0.06996691]],
       dtype=float32)>,
 <tf.Variab

In [34]:
md2.layers[1].weights

[<tf.Variable 'dense_12/kernel:0' shape=(10, 8) dtype=float32, numpy=
 array([[ 0.16642712, -0.12863725,  0.4812981 , -0.2927036 ,  0.23451336,
         -0.5783682 , -0.13214038,  0.61309576],
        [ 0.75653964, -0.1646041 , -0.1295206 ,  0.26822558,  0.26684   ,
         -0.076267  , -0.03139281, -0.3297195 ],
        [ 0.27998647,  0.8638649 , -0.07333727, -0.4821614 , -0.0634032 ,
         -0.07993656,  0.11397889,  0.7152214 ],
        [ 0.33251488,  0.8837515 ,  0.01627851,  0.40497962,  0.03151224,
          0.41391745,  0.3711178 ,  0.7636784 ],
        [-0.27128133,  0.32764587,  0.44671938, -0.15459622,  0.68971217,
          0.45862252, -0.3394096 , -0.07244425],
        [-0.43576637,  0.3288964 , -0.45950297,  0.30614   ,  0.32915792,
         -0.43720186,  0.14514309, -0.240438  ],
        [ 0.00998419, -0.22394137,  0.33907354, -0.15157624,  0.49424025,
         -0.29701144,  0.13162604,  0.31026077],
        [ 0.8125669 ,  0.33181235,  0.8176651 ,  0.12791352,  0.55896

In [None]:
## I would use the second model because its MSE is lower