In [1]:
# Prediction of fuel efficiency & deploying it on the android app

# importing pandas
import pandas as pd

# importing tensorflow
import tensorflow as tf

# importing keras
from tensorflow import keras
# importing layers from tensorflow
from tensorflow.keras import layers


# importing train test split
from sklearn.model_selection import train_test_split


In [2]:
# getting the data
dataset_path = keras.utils.get_file("auto-mpg.data", "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data")
dataset_path

Downloading data from http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data


'/root/.keras/datasets/auto-mpg.data'

In [3]:
# df = pd.DataFrame(dataset_path)

In [4]:
# Creating a list of column names
column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight',
                'Acceleration', 'Model Year', 'Origin']

In [5]:
# reading the csv file by specifying the colmumns 
raw_dataset = pd.read_csv(dataset_path, names=column_names,
                      na_values = "?", comment='\t',
                      sep=" ", skipinitialspace=True)

In [41]:

dataset = raw_dataset.copy()
# dataset has a copy of raw_dataset

# printing a random values of dataset

print(dataset.sample(5))


      MPG  Cylinders  Displacement  ...  Acceleration  Model Year  Origin
368  27.0          4         112.0  ...          18.6          82       1
301  34.2          4         105.0  ...          13.2          79       1
199  20.0          6         225.0  ...          17.7          76       1
54   35.0          4          72.0  ...          18.0          71       3
117  29.0          4          68.0  ...          19.5          73       2

[5 rows x 8 columns]


In [7]:
# Dropping the null or missing values
dataset.dropna()

Unnamed: 0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,Model Year,Origin
0,18.0,8,307.0,130.0,3504.0,12.0,70,1
1,15.0,8,350.0,165.0,3693.0,11.5,70,1
2,18.0,8,318.0,150.0,3436.0,11.0,70,1
3,16.0,8,304.0,150.0,3433.0,12.0,70,1
4,17.0,8,302.0,140.0,3449.0,10.5,70,1
...,...,...,...,...,...,...,...,...
393,27.0,4,140.0,86.0,2790.0,15.6,82,1
394,44.0,4,97.0,52.0,2130.0,24.6,82,2
395,32.0,4,135.0,84.0,2295.0,11.6,82,1
396,28.0,4,120.0,79.0,2625.0,18.6,82,1


In [8]:
# Checking the null values
dataset.isnull().sum()
# dataset = dataset.dropna()


MPG             0
Cylinders       0
Displacement    0
Horsepower      6
Weight          0
Acceleration    0
Model Year      0
Origin          0
dtype: int64

In [9]:
dataset = dataset.dropna()

In [10]:

origin = dataset.pop('Origin')
dataset['USA'] = (origin == 1)*1.0
dataset['Europe'] = (origin == 2)*1.0
dataset['Japan'] = (origin == 3)*1.0
dataset.tail()

Unnamed: 0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,Model Year,USA,Europe,Japan
393,27.0,4,140.0,86.0,2790.0,15.6,82,1.0,0.0,0.0
394,44.0,4,97.0,52.0,2130.0,24.6,82,0.0,1.0,0.0
395,32.0,4,135.0,84.0,2295.0,11.6,82,1.0,0.0,0.0
396,28.0,4,120.0,79.0,2625.0,18.6,82,1.0,0.0,0.0
397,31.0,4,119.0,82.0,2720.0,19.4,82,1.0,0.0,0.0


In [12]:
# X has all the column of the dataset except 'MPG' as we want to predict that
X = dataset.drop('MPG',axis='columns')

In [13]:
# y has 'MPG' column
y = dataset['MPG']

In [14]:
# Splitting the the data into X_train,X_test,y_train,y_test in 25:75 ratio

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [15]:
# getting the stats of train data
train_stats = X_train.describe()
train_stats

Unnamed: 0,Cylinders,Displacement,Horsepower,Weight,Acceleration,Model Year,USA,Europe,Japan
count,294.0,294.0,294.0,294.0,294.0,294.0,294.0,294.0,294.0
mean,5.469388,195.482993,104.472789,2971.829932,15.507143,76.136054,0.639456,0.14966,0.210884
std,1.712456,104.656392,38.600221,841.771254,2.856998,3.651433,0.480977,0.357346,0.408632
min,3.0,70.0,46.0,1613.0,8.0,70.0,0.0,0.0,0.0
25%,4.0,105.0,75.0,2227.0,13.5,73.0,0.0,0.0,0.0
50%,4.0,151.0,92.5,2781.5,15.4,76.0,1.0,0.0,0.0
75%,8.0,302.0,129.75,3625.75,17.3,79.0,1.0,0.0,0.0
max,8.0,455.0,230.0,5140.0,24.8,82.0,1.0,1.0,1.0


In [None]:
# train_stats = train_stats.transpose()
# train_stats


In [28]:
# creating a function in order to normalise the train & test data
def norm(x):
  return (x - train_stats.mean()) / train_stats.std()
normed_train_data = norm(X_train)
normed_test_data = norm(y_test)


In [29]:
def build_model():
  model = keras.Sequential([
                            #input layer
    layers.Dense(64, activation='relu', input_shape=[len(X_train.keys())]),
    # -----------------------hidden layers-------------------------
    layers.Dense(64, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(64, activation='relu'),
    # -------------------------------------------
    # output layer
    layers.Dense(1)
  ])
  # specifying the optimizers
  optimizer = tf.keras.optimizers.RMSprop(0.001)


# compiling the model & then returing it
  model.compile(loss='mse',
                optimizer=optimizer,
                metrics=['mae', 'mse'])
  return model

model = build_model()

# getting the summary of the model
model.summary()



Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_12 (Dense)             (None, 64)                640       
_________________________________________________________________
dense_13 (Dense)             (None, 64)                4160      
_________________________________________________________________
dense_14 (Dense)             (None, 64)                4160      
_________________________________________________________________
dense_15 (Dense)             (None, 64)                4160      
_________________________________________________________________
dense_16 (Dense)             (None, 64)                4160      
_________________________________________________________________
dense_17 (Dense)             (None, 1)                 65        
Total params: 17,345
Trainable params: 17,345
Non-trainable params: 0
__________________________________________________

In [30]:
# Specing the epochs as to how many times the model training will go through
EPOCHS = 500


In [31]:
# fitting the model using model.fit
# & specifying the epochs 
history = model.fit(
  X_train,y_train ,
  epochs=EPOCHS)


Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

In [36]:

loss, mae, mse = model.evaluate(X_test,y_test, verbose=0)


In [37]:
loss

12.421639442443848

In [38]:
mae

2.870661735534668

In [39]:
mse

12.421639442443848

In [40]:

kearas_file = "automobile.h5"
tf.keras.models.save_model(model,kearas_file)
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tfmodel = converter.convert()
open("automobile.tflite","wb").write(tfmodel)

INFO:tensorflow:Assets written to: /tmp/tmp6_9e5ylg/assets


INFO:tensorflow:Assets written to: /tmp/tmp6_9e5ylg/assets


72244