In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use('seaborn')
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

Importing libraries

In [2]:
df = pd.read_csv(r'../input/tesla-inc-tsla-dataset/TSLA.csv')
print(df.shape)
df.head()

Printing dimensions/first 5 rows of dataset

In [3]:
df['Date'] = pd.to_datetime(df['Date'])
df['Date'].head()

Reformatting 'Date' column

In [4]:
plt.figure(figsize=(16, 6), constrained_layout=True)
v = ['Open', 'High', 'Low', 'Close']
c = ['red', 'green', 'blue', 'yellow']
plt.title('Tesla Open/High/Low/Close Stock Price vs. Date')
plt.xlabel('Date')
plt.ylabel('Stock Price')
for ve, ce in zip(v, c):
    plt.plot(df['Date'], df[ve], color = ce, linewidth=2, label=f'{ve} Price')
plt.legend()
plt.show()

Data visualization with matplotlib

In [5]:
df.dropna(inplace=True)
X = df[['Open', 'High', 'Low', 'Close']].to_numpy()
scaler_x = MinMaxScaler(feature_range=(0,1)).fit(X)
X_scaled = scaler_x.transform(X)
y = df['Adj Close'].to_numpy().reshape(-1,1)
scaler_y= MinMaxScaler(feature_range=(0,1)).fit(y)
y_scaled = scaler_y.transform(y)
print(X_scaled.shape)
print(X_scaled)
print(y_scaled.shape)
print(y_scaled)

Preprocessing

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, train_size = 0.8, test_size=0.2, random_state=0)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

Data splitting

In [7]:
model = keras.Sequential([
    keras.layers.Dense(4, input_shape=(4,), activation='linear'),
    keras.layers.Dense(1, activation='linear')
])

model.compile(loss='mean_squared_error', optimizer='adam')

fitted_model = model.fit(X_train, y_train, epochs=50)

Creating and fitting a sequential model

In [8]:
test_predict = model.predict(X_test).flatten()
train_predict = model.predict(X_train).flatten()

Flattening predicted data in order to have its dimensions match that of what it will be compared to

In [9]:
flat_y_test = y_test.flatten()
plt.figure(figsize=(16,16))
ap = pd.DataFrame({'Actual':flat_y_test,'Predicted':test_predict})
sns.scatterplot(data=ap,x='Actual', y='Predicted')
plt.title('Actual Test Values vs. Predicted Test Values')
plt.xscale('linear')
plt.yscale('linear')
plt.show()
ap.corr()

Data visualization for test data vs. predicted data

In [10]:
flat_y_train = y_train.flatten()
plt.figure(figsize=(16,16))
ap = pd.DataFrame({'Actual':flat_y_train,'Predicted':train_predict})
sns.scatterplot(data=ap,x='Actual', y='Predicted')
plt.title('Actual Training Values vs. Predicted Training Values')
plt.xscale('linear')
plt.yscale('linear')
plt.show()
ap.corr()

Data visualization for training data vs. predicted data

In [14]:
r2_score(test_predict, flat_y_test)

In [15]:
r2_score(train_predict, flat_y_train)