In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

pd.options.display.max_rows = 10
pd.options.display.float_format = "{:.1f}".format

import warnings
warnings.filterwarnings('ignore')

tf.__version__

### Loading dataset using Pandas
For detailed explaination click [here](https://github.com/akshayparakh25/Python-for-Data_Science/blob/master/jupyter-notebooks/pandas.ipynb) <br>
The dataset is available [here](https://github.com/akshayparakh25/imdb_1000_scraper/blob/master/imdb_data.csv).

In [None]:
df = pd.read_csv('./../data/imdbtop1000/imdb_data.csv', sep='\t')
df = df.rename(columns={'User Votes': 'Votes',
                        'Imdb Rating': 'Rating',
                       'Gross(in Million Dollars)': 'Earnings',
                       'Runtime(Minutes)' : 'Runtime'})
#It is very important to normalise the input features in a proper range
#It helps in avoiding very large calculations
df.Votes = df.Votes / 1000000
df.head()

#### Statistical analysis of data to find the best input feature for target *quality*

In [None]:
df.describe()

In [None]:
#Correlation between columns to identify best feature for training a model
df.corr()

In [None]:
plt.figure(figsize=(8,6))
plt.title("Analysis of data points Votes Vs Rating")
sns.scatterplot(x=df.Votes, y=df.Rating)
plt.xlabel('User Votes')
plt.ylabel('IMDB Rating')
plt.show()

## Linear Regression with one variable

#### Defining and building model

In [None]:
def build_model(lr):
    #initialise model :: Sequential Model
    model = tf.keras.Sequential()
    
    #Add layers to the model
    model.add(tf.keras.layers.Dense(units=1,
                                   input_shape=(1,)))
    
    #Compile model
    #Configure training to minimize the model's mean squared error.
    model.compile(optimizer=tf.keras.optimizers.SGD(lr=lr),
#                     optimizer=tf.keras.optimizers.RMSprop(lr=lr),
                 loss="mean_squared_error",
#                  metrics=[tf.keras.metrics.RootMeanSquaredError()]
                 )
    
    return model

In [None]:
dummy_model = build_model(0.1)
dummy_model.get_weights()

In [None]:
dummy_model.layers

In [None]:
dummy_model.inputs

In [None]:
dummy_model.output

In [None]:
dummy_model.summary()

In [None]:
dummy_model.get_config()

In [None]:
def train(model, dataset, features, label, epochs, batch_size):
    #Feeding the model training data
    history = model.fit(x=dataset[features],
                        y=dataset[label],
                        batch_size=batch_size,
                        epochs=epochs)
    
    weight = model.get_weights()[0]
    bias = model.get_weights()[1]
    
    return weight, bias, history

#### Batch Gradient Descent

In [None]:
learning_rate = 0.1
epochs = 15
batch_size = len(df)

feature = "Votes"
label = "Rating"

model = build_model(learning_rate)

w, b, hist = train(model, dataset=df, features=feature, label=label, epochs=epochs, batch_size=batch_size)

In [None]:
print(w[0][0])
print(b[0])
predictions = w[0][0] * df.Votes + b[0]

plt.figure(figsize=(8,6))
plt.title("Analysis of trained model and data points")
sns.scatterplot(x=df.Votes, y=df.Rating)
sns.lineplot(x=df.Votes, y=predictions, color='red')
plt.xlabel('User Votes')
plt.ylabel('IMDB Rating')
plt.show()

In [None]:
LOSS = pd.DataFrame(hist.history)['loss']

plt.figure(figsize=(8,6))
plt.plot(LOSS, label='BGD')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()
plt.show()

#### Stochastic Gradient Descent

In [None]:
learning_rate = 0.1
epochs = 15
batch_size = 1

feature = "Votes"
label = "Rating"

model = build_model(learning_rate)

w, b, hist = train(model, dataset=df, features=feature, label=label, epochs=epochs, batch_size=batch_size)

In [None]:
print(w[0][0])
print(b[0])
predictions = w[0][0] * df.Votes + b[0]

plt.figure(figsize=(8,6))
plt.title("Analysis of trained model and data points")
sns.scatterplot(x=df.Votes, y=df.Rating)
sns.lineplot(x=df.Votes, y=predictions, color='red')
plt.xlabel('User Votes')
plt.ylabel('IMDB Rating')
plt.show()

In [None]:
LOSS = pd.DataFrame(hist.history)['loss']

plt.figure(figsize=(8,6))
plt.plot(LOSS, label='BGD')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()
plt.show()

#### Mini-Batch Gradient Descent

In [None]:
learning_rate = 0.1
epochs = 15
batch_size = 10

feature = "Votes"
label = "Rating"

model = build_model(learning_rate)

w, b, hist = train(model, dataset=df, features=feature, label=label, epochs=epochs, batch_size=batch_size)

In [None]:
print(w[0][0])
print(b[0])
predictions = w[0][0] * df.Votes + b[0]

plt.figure(figsize=(8,6))
plt.title("Analysis of trained model and data points")
sns.scatterplot(x=df.Votes, y=df.Rating)
sns.lineplot(x=df.Votes, y=predictions, color='red')
plt.xlabel('User Votes')
plt.ylabel('IMDB Rating')
plt.show()

In [None]:
LOSS = pd.DataFrame(hist.history)['loss']

plt.figure(figsize=(8,6))
plt.plot(LOSS, label='MBGD')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()
plt.show()