# Import Packages
Lets load all the needed packages for this notebook:

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns

In [None]:
import tensorflow as tf
tf.__version__

# The Dataset
For this notebook we will use Tabular Playground Series - Mar 2021.

Let's define the path to the dataset:

In [None]:
data = pd.read_csv('../input/tabular-playground-series-jan-2022/train.csv')

In [None]:
data.info()

# Quick Look at the Data
Let’s take a look at the top five rows:

In [None]:
data.head()

In [None]:
data.drop('row_id', axis = 1, inplace=True)

# Check if there is null values

In [None]:
data.isna().sum()

In [None]:
country = data['country'].unique()
print('Unique value of country column: ',country)

In [None]:
# figure(figsize=(8, 6), dpi=80)
data.value_counts(data['country']).plot.bar()
plt.title('Data Distribution')
plt.xlabel('Country')
plt.ylabel('counts')

plt.show()

In [None]:
store = data['store'].unique()
print('Unique value of store column: ', store)


data.value_counts(data['store']).plot.bar()
plt.title('Data Distribution')
plt.xlabel('Store')
plt.ylabel('counts')

plt.show()

In [None]:
product = data['product'].unique()
print('Unique value of product column: ', product)

data.value_counts(data['product']).plot.bar()
plt.title('Data Distribution')
plt.xlabel('Product')
plt.ylabel('counts')

plt.show()

In [None]:
data['date'] = pd.to_datetime(data['date'])

data.info()

In [None]:
# Create new columns
data['day'] = data['date'].dt.day
data['month'] = data['date'].dt.month
data['year'] = data['date'].dt.year

data.info()

In [None]:
data.head()

In [None]:
data.drop(['date'], axis = 1, inplace = True)
data.head()

# Label Encoding

In [None]:
from sklearn.preprocessing import LabelEncoder

le_cols = ['country', 'store', 'product']

def convert2num(X_new):

    label_encoder = LabelEncoder()

    for col in le_cols:
        X_new[col] = label_encoder.fit_transform(X_new[col])
        
    return X_new

In [None]:
data = convert2num(data)

data[le_cols].head()

In [None]:
data.head()

In [None]:
data['year'] = data['year']/ 2022

# Split Data

In [None]:
data.drop(['day'], axis = 1, inplace = True)
X = data.drop('num_sold', axis=1).to_numpy()
y = data['num_sold'].to_numpy()

X.shape, y.shape

In [None]:
X[:5]

# Splitting traning set

In [None]:
from sklearn.model_selection import train_test_split

tf.random.set_seed(42)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

# Building and Training our model

In [None]:
# let's build a model to find patterns in it

# Set random seed
tf.random.set_seed(42)

# 1. Create a model
model_1 = tf.keras.Sequential([
           tf.keras.layers.Dense(500, activation='relu'),
           tf.keras.layers.Dense(250, activation='relu'),
           tf.keras.layers.Dense(100, activation='relu'),
           tf.keras.layers.Dense(10, activation='relu'), 
           tf.keras.layers.Dense(1)
])

# 2. Comile the model
model_1.compile(loss=tf.keras.losses.mae,
                 optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                 metrics=['MAE'])

# 3. Fit the model
history = model_1.fit(X_train, 
                      y_train, 
                      epochs=50,
                      verbose = 1,
                      validation_data=(X_test, y_test))

In [None]:
pd.DataFrame(history.history).plot()
plt.ylabel("loss")
plt.xlabel("epochs")

In [None]:
y_p = model_1.predict(X_test)

In [None]:
y_p[:5], y_test[:5]

In [None]:
from sklearn.metrics import r2_score
r2_score(y_test, y_p)

In [None]:
model_1.summary()

In [None]:
# Let's check out a way of viewing our deep learning models
from tensorflow.keras.utils import plot_model

# See the inputs and outputs of each layer
plot_model(model_1, show_shapes=True)

# Test Data

In [None]:
df_test =  pd.read_csv('../input/tabular-playground-series-jan-2022/test.csv')

In [None]:
test_passengerIds = df_test['row_id'].values
df_test.drop('row_id', axis = 1, inplace=True)
df_test.head()

In [None]:
df_test['date'] = pd.to_datetime(df_test['date'])

df_test.info()

In [None]:
# Create new columns
df_test['day'] = df_test['date'].dt.day
df_test['month'] = df_test['date'].dt.month
df_test['year'] = df_test['date'].dt.year

df_test.info()

In [None]:
df_test.drop(['date'], axis = 1, inplace = True)
df_test.head()

In [None]:
df_test = convert2num(df_test)

df_test[le_cols].head()

In [None]:
df_test['year'] = df_test['year']/ 2022
df_test.head()

In [None]:
df_test.drop(['day'], axis = 1, inplace = True)
df_test = df_test.to_numpy()
df_test[:5]

In [None]:
y_pred = model_1.predict(df_test)[:, 0]

In [None]:
y_pred[:5]

In [None]:
y_pred = np.array(y_pred)

In [None]:
y_pred.shape

In [None]:
test_passengerIds.shape

In [None]:
output = pd.DataFrame({'row_id':test_passengerIds, 'num_sold': y_pred})
output.to_csv('submission.csv', index=False)

In [None]:
output