In [161]:
import numpy as np
import pandas as pd
import tensorflow as tf

## Dividing data into train data and test data
    - train data & test data will be in the form of numpy arrays
    - train data: x_train, y_train
    - test data: x_test, y_test
    - x portion is the input var, y portion is the output var

In [171]:
# loading our clean dataset
df = pd.read_csv("data-cleaned.csv")

In [284]:
# configuring sizes of our train & test dataframes
dfSize = len(df)
trainSize = int(dfSize * 0.7)
testSize = int(dfSize * 0.3)

# grabbing our training dataframes
x_train_df = df[["revenues", "revenue_percent_change", "profits", 
                "profits_percent_change", "assets", "market_value", "employees"]].head(trainSize)
y_train_df = df["rank_change"].head(trainSize)

# converting our training dataframes to numpy arrays
x_train = np.array(x_train_df)
y_train = np.array(y_train_df)

#grabbing our testing dataframes
x_test_df = df[["revenues", "revenue_percent_change", "profits", 
                "profits_percent_change", "assets", "market_value", "employees"]].tail(testSize)
y_test_df = df["rank_change"].tail(testSize)

#converting our testing dataframes to numpy arrays
x_test = np.array(x_test_df)
y_test = np.array(y_test_df)

In [285]:
# verifying the lengths of our training & testing numpy arrays
print(len(x_train) == trainSize & len(y_train) == trainSize)
print(len(x_test) == testSize & len(y_test) == testSize)

True
True


## Building our model
### Goal: Predit if the company rank changed from previous year or not based on 6 input vars
    - input vars: "revenues", "revenue_percent_change", "profits", "profits_percent_change", "assets", "employees"

In [287]:
# normalize the dataset
x_train = tf.keras.utils.normalize(x_train, axis=1)
x_test = tf.keras.utils.normalize(x_test, axis=1)

# build the model
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(128, activation = tf.nn.relu))
model.add(tf.keras.layers.Dense(1, activation = tf.nn.sigmoid))

model.compile(optimizer = "sgd", loss = "binary_crossentropy", metrics = ["accuracy"])

# train the model with trainig data
model.fit(x_train, y_train, epochs = 5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fb1846505e0>

## Evaluating model

In [288]:
print("--Evaluate model--")
model_loss1, model_acc1 = model.evaluate(x_train,  y_train, verbose=2)
model_loss2, model_acc2 = model.evaluate(x_test,  y_test, verbose=2)
print(f"Train / Test Accuracy: {model_acc1*100:.1f}% / {model_acc2*100:.1f}%")

--Evaluate model--
16/16 - 0s - loss: 0.6446 - accuracy: 0.6590 - 113ms/epoch - 7ms/step
7/7 - 0s - loss: 0.6561 - accuracy: 0.6408 - 21ms/epoch - 3ms/step
Train / Test Accuracy: 65.9% / 64.1%
