### Predicting Student Score based on Study Hours

In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

In [2]:
print(tf.__version__)

2.10.0


## Load the data from the CSV file

In [3]:
# Load the data into a pandas DataFrame
student_data = pd.read_csv("./student_scores.csv")
student_data

Unnamed: 0,Hours,Scores
0,2.5,21
1,5.1,47
2,3.2,27
3,8.5,75
4,3.5,30
5,1.5,20
6,9.2,88
7,5.5,60
8,8.3,81
9,2.7,25


In [4]:
## Separate the data into training and test data
from sklearn.model_selection import train_test_split
X = student_data["Hours"]
y = student_data["Scores"]

# Split the data into training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.20)

X_train.size, y_train.size

(20, 20)

In [5]:
# Set the random seed
tf.random.set_seed(42)

# 1. Create the model
study_mod_1 = tf.keras.Sequential([
    tf.keras.layers.Dense(2),
    tf.keras.layers.Dense(1)
])

# 2. Compile the model
study_mod_1.compile(loss=tf.keras.losses.mae,
                    optimizer=tf.keras.optimizers.Adam(),
                    metrics=["mae"])

# 3. Fit the model
study_mod_1.fit(X_train, y_train, epochs=50, verbose=0)

<keras.callbacks.History at 0x20af850aaa0>

In [6]:
# Evaluate the model
y_preds_1 = study_mod_1.predict(X_test)



In [7]:
# Visualize the model's predictions
plt.plot(X_test, y_test)

plt.plot(X_test, y_preds_1)

plt.xlabel("Hours")
plt.ylabel("Scores")

plt.legend(["Training Data", "Prediction Results"])

NameError: name 'plt' is not defined

In [None]:
# High loss, increase the number of layers
# 1. Set the random seed
study_mod_2 = tf.keras.Sequential([
    tf.keras.layers.Dense(4),
    tf.keras.layers.Dense(1)
])

# 2. Compile the model
study_mod_2.compile(loss=tf.keras.losses.mae,
                    optimizer=tf.keras.optimizers.Adam(),
                    metrics=["mae"])

# 3. Fit the model
study_mod_2.fit(X_train, y_train, epochs=50, verbose=0)

In [None]:
# Evaluate the model
y_preds_2 = study_mod_2.predict(X_test)

In [None]:
# Visualize the model's accuracy
plt.plot(X_test, y_test)

plt.plot(X_test, y_preds_2)

plt.xlabel("Hours")
plt.ylabel("Scores")

plt.legend(["Training Data", "Prediction Results"])

In [None]:
# Set the RELU activation function and increase the number of epochs
# Set the random seed
tf.random.set_seed(42)

# 1. Create the model
study_mod_3 = tf.keras.Sequential([
    tf.keras.layers.Dense(4, activation="relu"),
    tf.keras.layers.Dense(1)
])

# 2. Compile the model
study_mod_3.compile(loss=tf.keras.losses.mae,
                    optimizer=tf.keras.optimizers.Adam(),
                    metrics=["mae"])

# 3. Fit the model
study_mod_3.fit(X_train, y_train, epochs=100, verbose=0)

In [None]:
# Evaluate the model
y_preds_3 = study_mod_3.predict(X_test)

In [None]:
# Visualize the predictions
plt.plot(X_test, y_test)

plt.plot(X_test, y_preds_3)

plt.xlabel("Hours")
plt.ylabel("Scores")

plt.legend(["Training Data", "Prediction Results"])

In [None]:
# Increase the learning rate
# Set the random seed
tf.random.set_seed(42)

# 1. Create the model
study_mod_4 = tf.keras.Sequential([
    tf.keras.layers.Dense(4),
    tf.keras.layers.Dense(1)
])

# 2. Compile the model
study_mod_4.compile(loss=tf.keras.losses.mae,
                    optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),
                    metrics=["mae"])

# 3. Fit the model
study_mod_4.fit(X_train, y_train, epochs=100, verbose=0)

In [None]:
# Evaluate the model
y_preds_4 = study_mod_4.predict(X_test)

In [None]:
# Visualize the predictions' accuracy
plt.plot(X_test, y_test)

plt.plot(X_test, y_preds_4)

plt.xlabel("Hours")

plt.ylabel("Scores")

plt.legend(["Training Data", "Prediction Results"])

In [None]:
## The model has significantly improved but we need to tweak it further
# Set the random seed
tf.random.set_seed(42)

# 1. Create the model
study_mod_5 = tf.keras.Sequential([
    tf.keras.layers.Dense(4),
    tf.keras.layers.Dense(4),
    tf.keras.layers.Dense(1)
])

# 2. Compile the model
study_mod_5.compile(loss=tf.keras.losses.mae,
                    optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),
                    metrics=["mae"])

# 3. Fit the model
study_mod_5.fit(X_train, y_train, epochs=100, verbose=0)

In [None]:
y_preds_5 = study_mod_5.predict(X_test)
y_preds_5

In [None]:
# Plot the test data
plt.plot(X_test, y_test)

# Plot the prediction results
plt.plot(X_test, y_preds_5)

plt.xlabel("Hours")

plt.ylabel("Scores")

plt.legend(["Training Data", "Prediction Results"])

In [None]:
# Try with a model with more nodes
# Set the random seed
tf.random.set_seed(42)

# 1. Create the model
study_mod_6 = tf.keras.Sequential([
    tf.keras.layers.Dense(10),
    tf.keras.layers.Dense(10),
    tf.keras.layers.Dense(1)
])

# 2. Compile the model
study_mod_6.compile(loss=tf.keras.losses.mae,
                    optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),
                    metrics=["mae"])

# 3. Fit the model
study_mod_6.fit(X_train, y_train, epochs=100, verbose=0)

In [None]:
y_preds_6 = study_mod_6.predict(X_test)

In [None]:
# Plot the training and the prediction data against the X values
plt.plot(X_test, y_test)
plt.plot(X_test, y_preds_6)

plt.xlabel("Hours")
plt.ylabel("Scores")

plt.legend(["Training Data", "Prediction Results"])

In [None]:
# The model has improved slighlty
# Set the random seed
tf.random.set_seed(42)

# 1. Create the model
study_mod_7 = tf.keras.Sequential([
    tf.keras.layers.Dense(100),
    tf.keras.layers.Dense(100),
    tf.keras.layers.Dense(1)
])

# 2. Compile the model
study_mod_7.compile(loss=tf.keras.losses.mae,
                    optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),
                    metrics=["mae"])

# 3. Fit the model
study_mod_7.fit(X_train, y_train, epochs=100, verbose=0)

In [None]:
# Evaluate the prediction capabality
y_preds_7 = study_mod_7.predict(X_test)

In [None]:
# Plot the values on a graph
# Plot training data
plt.plot(X_test, y_test)

# Plot prediction data
plt.plot(X_test, y_preds_7)

plt.xlabel("Hours")
plt.ylabel("Scores")

plt.legend(["Training Data", "Prediction Results"])

In [None]:
# The model's prediction results are way off the test data
# Set the random seed
tf.random.set_seed(42)

# 1. Create the model
study_mod_8 = tf.keras.Sequential([
    tf.keras.layers.Dense(10, activation="relu"),
    tf.keras.layers.Dense(10, activation="relu"),
    tf.keras.layers.Dense(1, activation="relu")
])

# 2. Compile the model
study_mod_8.compile(loss=tf.keras.losses.mae,
                   optimizer=tf.keras.optimizers.SGD(),
                    metrics=["mae"])

# 3. Fit the model
study_mod_8.fit(X_train, y_train, epochs=100, verbose=0)

In [None]:
# Evaluate the accuracy of predictions
y_preds_8 = study_mod_8.predict(X_test)

In [None]:
# Plot the predictions and test data on a graph
plt.plot(X_test, y_test)

plt.plot(X_test, y_preds_8)

plt.xlabel("Hours")
plt.ylabel("Scores")

plt.legend(["Training Data", "Prediction Results"])

In [None]:
# Use a different activation function
# Set the random seed
tf.random.set_seed(42)

# 1. Create the model
study_mod_9 = tf.keras.Sequential([
    tf.keras.layers.Dense(10, activation="relu"),
    tf.keras.layers.Dense(10, activation="relu"),
    tf.keras.layers.Dense(1, activation="relu")
])

# 2. Compile the model
study_mod_9.compile(loss=tf.keras.losses.mae,
                    optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),
                    metrics=["mae"])

# 3. Fit the model
study_mod_9.fit(X_train, y_train, epochs=100, verbose=0)

In [None]:
# Check the accuracy of the predictions
y_preds_9 = study_mod_9.predict(X_test)
y_preds_9

In [None]:
# Visualize the predictions
plt.plot(X_test, y_test)

plt.plot(X_test, y_preds_9)

plt.xlabel("Hours")
plt.ylabel("Scores")

plt.legend(["Training Data", "Prediction Results"])

In [None]:
# Increase the epochs
tf.random.set_seed(42)

# 1. Create the model
study_mod_10 = tf.keras.Sequential([
    tf.keras.layers.Dense(1),
    tf.keras.layers.Dense(10),
    tf.keras.layers.Dense(10),
    tf.keras.layers.Dense(1)
])

# 2. Compile the model
study_mod_10.compile(loss=tf.keras.losses.mae,
                     optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),
                     metrics=["mae"])

# 3. Fit the model
study_mod_10.fit(X_train, y_train, epochs=200, verbose=0)

In [None]:
# Evaluate the accuracy of the predictions
y_preds_10 = study_mod_10.predict(X_test)

In [None]:
# Visualize the predictions
plt.plot(X_test, y_test)

plt.plot(X_test, y_preds_10)

plt.xlabel("Hours")
plt.ylabel("Scores")

plt.legend(["Training Data", "Prediction Results"])

In [None]:
predict = study_mod_10.predict(tf.constant([[9.25,]]))
predict