In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

pd.options.display.max_rows = 10
pd.options.display.float_format = "{:.1f}".format

import warnings
warnings.filterwarnings('ignore')

tf.__version__

In [None]:
df = pd.read_csv('./../data/imdbtop1000/imdb_data.csv', sep='\t')
df = df.rename(columns={'User Votes': 'Votes',
                        'Imdb Rating': 'Rating',
                       'Gross(in Million Dollars)': 'Earnings',
                       'Runtime(Minutes)' : 'Runtime'})
df.head()

In [None]:
dataframe = df[['Votes', 'Earnings', 'Rating']]
#It is very important to normalise the input features in a proper range
#It helps in avoiding very large calculations
dataframe['Votes'] = dataframe['Votes'] / 1000000
dataframe['Earnings'] = dataframe['Earnings'] / 100
dataframe.describe()

In [None]:
#There are 73 (1000 - 927) rows with NaN/nan values
#Drop those rows
dataframe.dropna(inplace=True)

In [None]:
#Empty dataframe confirms abscence of rows with nan/NaN
dataframe[dataframe.Earnings.isnull()]

In [None]:
from tensorflow.keras import layers, Sequential, optimizers

In [None]:
def build_model(lr):
    #initialise model :: Sequential Model
    model = Sequential()
    
    #Add layers to the model
    model.add(layers.Dense(units=1, input_shape=(2,)))
    
    #Compile model
    #Configure training to minimize the model's mean squared error.
    model.compile(optimizer=optimizers.SGD(lr=lr),
                    loss="mean_squared_error")
    
    return model

In [None]:
def train(model, dataset, features, label, epochs, batch_size):
    #Feeding the model training data
    history = model.fit(x=dataset[features],
                        y=dataset[label],
                        batch_size=batch_size,
                        epochs=epochs)
    
    return model, history

In [None]:
learning_rate = 0.1
epochs = 50
batch_size = len(df)

feature = ["Votes", "Earnings"]
label = "Rating"

model = build_model(learning_rate)

trained_model, hist = train(model, dataset=dataframe, features=feature, label=label, epochs=epochs, batch_size=batch_size)


In [None]:
trained_model.get_weights()

In [None]:
LOSS = pd.DataFrame(hist.history)['loss']

plt.figure(figsize=(8,6))
plt.plot(LOSS, label='LOSS')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()
plt.show()