In [77]:
from src.functions.normalize import normalize_data
import numpy as np
import pandas as pd
from src.functions.activation import Sigmoid, ReLU


# Get the Dataset for training
# Load the dataset
data = pd.read_csv(
    'datasets/tesla/TESLA.csv',
    usecols=['Open', 'High', 'Low', 'Close', 'Volume']
)

# Display the first 5 rows of the dataset
print(data.head())

# Normalize the dataset
normalized_data = normalize_data(data)

# Display the first 5 rows of the normalized dataset
print(normalized_data.head())

# Get the features and labels
features_data = normalized_data[['Open', 'High', 'Low', 'Volume']].values
labels_data = normalized_data['Close'].values

# Display the first 5 rows of the features
print(f"Features: \n{features_data[:5]}")
print(f"Labels: \n{labels_data[:5]}")

# Activation funciton
sigmoid = Sigmoid()
relu = ReLU()

       Open      High       Low     Close     Volume
0  1.266667  1.666667  1.169333  1.592667  281494500
1  1.719333  2.028000  1.553333  1.588667  257806500
2  1.666667  1.728000  1.351333  1.464000  123282000
3  1.533333  1.540000  1.247333  1.280000   77097000
4  1.333333  1.333333  1.055333  1.074000  103003500
       Open      High       Low     Close    Volume
0 -0.720866 -0.717089 -0.722164 -0.718012  2.342717
1 -0.716440 -0.713633 -0.718317 -0.718051  2.042015
2 -0.716955 -0.716502 -0.720341 -0.719270  0.334327
3 -0.718259 -0.718300 -0.721383 -0.721070 -0.251957
4 -0.720214 -0.720277 -0.723306 -0.723086  0.076907
Features: 
[[-0.72086574 -0.71708888 -0.72216405  2.34271659]
 [-0.71644005 -0.71363301 -0.7183168   2.04201514]
 [-0.71695496 -0.71650227 -0.72034062  0.33432719]
 [-0.71825856 -0.71830035 -0.72138258 -0.25195686]
 [-0.72021395 -0.72027695 -0.7233062   0.07690679]]
Labels: 
[-0.7180116  -0.71805073 -0.71927035 -0.72107044 -0.72308575]


In [78]:
from src.functions.train import train_model


# Train the model
weights, bias, error_sum = train_model(
    features_data, 
    labels_data, 
    relu, 
    epochs=10, 
    learning_rate=0.0001
)

print(f"Weights: {weights}")
print(f"Bias: {bias}")
print(f"Error: {error_sum[-1]}")



Weights: [0.05117557 0.62515079 0.41295911 0.12010105]
Bias: [-0.16229361]
Error: 1502.0927308810326


In [79]:
from sklearn.metrics import accuracy_score

# Compute the accuracy of the model
XW = np.dot(features_data, weights) + bias
rounded_labels = np.round(labels_data)
y_prediction = np.round(sigmoid(XW))
score = accuracy_score(rounded_labels, y_prediction)
print(f"Accuracy: {score}")

Accuracy: 0.1819468024900962


In [80]:
from src.functions.plots import plot_error

# Plot the error values against the epochs
fig, results = plot_error(error_sum)
print(results)
fig.show()


{'epochs': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10]), 'error': [np.float64(3545.316848569023), np.float64(1938.0710149910087), np.float64(1665.6896058993564), np.float64(1584.8528738520056), np.float64(1547.7270453379888), np.float64(1527.8810605179629), np.float64(1516.6080386419553), np.float64(1509.727012226868), np.float64(1505.2231255401432), np.float64(1502.0927308810326)]}


In [81]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Load Data
data = pd.read_csv('datasets/tesla/TESLA.csv', usecols=['Date','Open','High','Low','Close','Volume'])
data.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2010-06-29,1.266667,1.666667,1.169333,1.592667,281494500
1,2010-06-30,1.719333,2.028,1.553333,1.588667,257806500
2,2010-07-01,1.666667,1.728,1.351333,1.464,123282000
3,2010-07-02,1.533333,1.54,1.247333,1.28,77097000
4,2010-07-06,1.333333,1.333333,1.055333,1.074,103003500


# Visualize the Dataset

In [82]:
# Visualize the dataset for all columns
import plotly.express as px
fig = px.line(
    data, 
    title='Tesla Stock Volume Over Time',
    x='Date', 
    y=['Volume'], 
    
    labels={'value': 'Price', 'variable': 'Price Type'},
    template='plotly_dark', 
    line_dash_sequence=['dot', 'dash', 'solid', 'dashdot']
    )

fig.show()


In [83]:
# Visualize the dataset for all columns
fig = px.line(
    data, 
    title='Tesla Stock Prices Over Time',
    x='Date', 
    y=['Open', 'High', 'Low', 'Close'], 
    labels={'value': 'Price', 'variable': 'Price Type'},
    template='plotly_dark', 
    line_dash_sequence=['dot', 'dash', 'solid', 'dashdot']
    )

fig.show()

In [84]:
# Preprocess Data
print(data.head())
to_datetime = pd.to_datetime(data['Date'])
print(to_datetime.head())
data['Date'] = to_datetime
# data.set_index('Date', inplace=True)
# data = data[['Close']]  # Use only the 'Close' column for simplicity
data.head()

         Date      Open      High       Low     Close     Volume
0  2010-06-29  1.266667  1.666667  1.169333  1.592667  281494500
1  2010-06-30  1.719333  2.028000  1.553333  1.588667  257806500
2  2010-07-01  1.666667  1.728000  1.351333  1.464000  123282000
3  2010-07-02  1.533333  1.540000  1.247333  1.280000   77097000
4  2010-07-06  1.333333  1.333333  1.055333  1.074000  103003500
0   2010-06-29
1   2010-06-30
2   2010-07-01
3   2010-07-02
4   2010-07-06
Name: Date, dtype: datetime64[ns]


Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2010-06-29,1.266667,1.666667,1.169333,1.592667,281494500
1,2010-06-30,1.719333,2.028,1.553333,1.588667,257806500
2,2010-07-01,1.666667,1.728,1.351333,1.464,123282000
3,2010-07-02,1.533333,1.54,1.247333,1.28,77097000
4,2010-07-06,1.333333,1.333333,1.055333,1.074,103003500


In [85]:
# Create lag features
data['Close_Lag1'] = data['Close']
data.dropna(inplace=True)
print(data.head())
# Normalize/Scale features
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data[['Close', 'Close_Lag1']])
# data_scaled = pd.DataFrame(data_scaled, columns=["0", "1"])
# data_scaled['Close'] = data_scaled['0']
# data_scaled['Close_Lag1'] = data_scaled['1']
# 
# # remove columns '0' and '1'
# data_scaled.drop(['0', '1'], axis=1, inplace=True)
data_scaled = pd.DataFrame(data_scaled, columns=['Close', 'Close_Lag1'])
data_scaled['Date'] = data['Date']
data_scaled.head()

        Date      Open      High       Low     Close     Volume  Close_Lag1
0 2010-06-29  1.266667  1.666667  1.169333  1.592667  281494500    1.592667
1 2010-06-30  1.719333  2.028000  1.553333  1.588667  257806500    1.588667
2 2010-07-01  1.666667  1.728000  1.351333  1.464000  123282000    1.464000
3 2010-07-02  1.533333  1.540000  1.247333  1.280000   77097000    1.280000
4 2010-07-06  1.333333  1.333333  1.055333  1.074000  103003500    1.074000


Unnamed: 0,Close,Close_Lag1,Date
0,0.001319,0.001319,2010-06-29
1,0.001309,0.001309,2010-06-30
2,0.001004,0.001004,2010-07-01
3,0.000554,0.000554,2010-07-02
4,5.1e-05,5.1e-05,2010-07-06


In [86]:
# Visualize the scaled dataset
fig = px.scatter(
    data_scaled, 
    title='Scaled Tesla Close Stock Prices Over Time',
    x='Date',
    y=['Close', 'Close_Lag1'], 
    labels={'value': 'Price', 'variable': 'Price Type'},
    template='plotly_dark', 
    
    )
fig.show()

In [111]:
# Split Data
X = data_scaled['Close_Lag1'].dropna()  # Lagged close prices
y = data_scaled['Close'].dropna()  # Actual close prices
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, train_size=0.8, shuffle=False)
# convert to numpy array
X_train = np.array(X_train, ndmin=2).reshape(-1, 1)
X_test = np.array(X_test, ndmin=2).reshape(-1, 1)
y_train = np.array(y_train, ndmin=2).reshape(-1, 1)
y_test = np.array(y_test, ndmin=2).reshape(-1, 1)
# Reshape the data
print(f'X_train shape: {X_train.shape}, \n{X_train[:5]}')
print(f'X_test shape: {X_test.shape}, \n{X_test[:5]}')
print(f"y_train shape: {y_train.shape}, \n{y_train[:5]}")
print(f"y_test shape: {y_test.shape}, \n{y_test[:5]}")


X_train shape: (2827, 1), 
[[1.31893362e-03]
 [1.30915180e-03]
 [1.00428027e-03]
 [5.54310805e-04]
 [5.05407544e-05]]
X_test shape: (707, 1), 
[[0.60013859]
 [0.61037701]
 [0.61176279]
 [0.62867741]
 [0.64251074]]
y_train shape: (2827, 1), 
[[1.31893362e-03]
 [1.30915180e-03]
 [1.00428027e-03]
 [5.54310805e-04]
 [5.05407544e-05]]
y_test shape: (707, 1), 
[[0.60013859]
 [0.61037701]
 [0.61176279]
 [0.62867741]
 [0.64251074]]


In [115]:
# Build Model
model = LinearRegression()

# Train Model
model.fit(X_train.tolist(), y_train.tolist())

# Evaluate Model
score = model.score(X_test.tolist(), y_test.tolist())
print(f"R^2 Score: {score}")

# Make Predictions
y_pred = model.predict(X_test)
print(f'Predictions: \n{y_pred[:5]}\n')
print(f'Actual: \n{y_test[:5]}\n')
# how well the model is performing
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: \n{mse}\n')

R^2 Score: 1.0
Predictions: 
[[0.60013859]
 [0.61037701]
 [0.61176279]
 [0.62867741]
 [0.64251074]]

Actual: 
[[0.60013859]
 [0.61037701]
 [0.61176279]
 [0.62867741]
 [0.64251074]]

Mean Squared Error: 
0.0



In [117]:
import os
import joblib

# Ensure the directory exists
os.makedirs('saved_models', exist_ok=True)

joblib.dump(model, 'saved_models/linear_regression_model.pkl')

['saved_models/linear_regression_model.pkl']

In [118]:

import joblib

# Load the model
model = joblib.load('saved_models/linear_regression_model.pkl')

# Make Predictions
y_pred = model.predict(X_test)
print(f'Predictions: \n{y_pred[:5]}\n')
print(f'Actual: \n{y_test[:5]}\n')
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

Predictions: 
[[0.60013859]
 [0.61037701]
 [0.61176279]
 [0.62867741]
 [0.64251074]]

Actual: 
[[0.60013859]
 [0.61037701]
 [0.61176279]
 [0.62867741]
 [0.64251074]]

Mean Squared Error: 0.0


In [119]:
from sklearn.metrics import accuracy_score

# Accuracy Score
y_pred = np.round(y_pred)
y_true = np.round(y_test)
accuracy = accuracy_score(y_true, y_pred)
print(f'Accuracy: {accuracy}')

Accuracy: 1.0


In [120]:
    # Make Predictions
print(X_test[-1][0])
print(X_test[-1].reshape(1, -1))
future_close = model.predict(X_test[-1].reshape(1, -1))
future_close = scaler.inverse_transform([[future_close[0], X_test[-1][0]]])
print(f'Predicted future close price: {future_close}')

0.6152516782145733
[[0.61525168]]


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 2 dimensions. The detected shape was (1, 2) + inhomogeneous part.