# Tesla Stock Monthly Forecasting - by Greg Hogg, CEO of MLNOW.ai

## Data Import and Cleaning

In [None]:
# If this is commented, the plots will show in Jupyter / Colab.
# If it gets executed, the plots will be embedded into the notebook HTML itself.

import plotly.offline as pyo
pyo.init_notebook_mode(connected=True)

In [None]:
import pandas as pd
import plotly.express as px

data = pd.read_csv('TSLA.csv')

data

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2010-07-01,1.666667,1.728000,0.998667,1.329333,1.329333,968637000
1,2010-08-01,1.366667,1.478667,1.159333,1.298667,1.298667,225573000
2,2010-09-01,1.308000,1.544000,1.300000,1.360667,1.360667,270688500
3,2010-10-01,1.379333,1.458000,1.333333,1.456000,1.456000,98217000
4,2010-11-01,1.462667,2.400000,1.403333,2.355333,2.355333,424726500
...,...,...,...,...,...,...,...
156,2023-07-01,276.489990,299.290009,254.119995,267.429993,267.429993,2392089000
157,2023-08-01,266.260010,266.470001,212.360001,258.079987,258.079987,2501580900
158,2023-09-01,257.260010,278.980011,234.580002,250.220001,250.220001,2439306100
159,2023-10-01,244.809998,268.940002,242.619995,251.119995,251.119995,1133945600


In [None]:
data.drop(['Open', 'High', 'Low', 'Adj Close', 'Volume'], axis=1, inplace=True)

data

Unnamed: 0,Date,Close
0,2010-07-01,1.329333
1,2010-08-01,1.298667
2,2010-09-01,1.360667
3,2010-10-01,1.456000
4,2010-11-01,2.355333
...,...,...
156,2023-07-01,267.429993
157,2023-08-01,258.079987
158,2023-09-01,250.220001
159,2023-10-01,251.119995


In [None]:
fig = px.line(data, x='Date', y='Close', title='Tesla Stock Price Over Time')

fig.show()

## Data Preparation for Training the Model (Window)

In [None]:
def create_rolling_window_df(data, window_size):
    """Generate a DataFrame with rolling windows of closing prices."""
    columns = [f'N-{window_size-i}' for i in range(window_size)] + ['N']
    df = pd.DataFrame(columns=['Target Date'] + columns)

    for i in range(len(data) - window_size):
        row = [data.index[i + window_size]] + list(data[i:i + window_size + 1])
        df.loc[i] = row

    return df

window_size = 5
data_with_dates = data.set_index('Date')['Close']
window_df = create_rolling_window_df(data_with_dates, window_size)

window_df

Unnamed: 0,Target Date,N-5,N-4,N-3,N-2,N-1,N
0,2010-12-01,1.329333,1.298667,1.360667,1.456000,2.355333,1.775333
1,2011-01-01,1.298667,1.360667,1.456000,2.355333,1.775333,1.606667
2,2011-02-01,1.360667,1.456000,2.355333,1.775333,1.606667,1.592667
3,2011-03-01,1.456000,2.355333,1.775333,1.606667,1.592667,1.850000
4,2011-04-01,2.355333,1.775333,1.606667,1.592667,1.850000,1.840000
...,...,...,...,...,...,...,...
151,2023-07-01,205.710007,207.460007,164.309998,203.929993,261.769989,267.429993
152,2023-08-01,207.460007,164.309998,203.929993,261.769989,267.429993,258.079987
153,2023-09-01,164.309998,203.929993,261.769989,267.429993,258.079987,250.220001
154,2023-10-01,203.929993,261.769989,267.429993,258.079987,250.220001,251.119995


In [None]:
X = window_df.iloc[:, 1:-1].values  # excluding Target Date and N column
y = window_df['N'].values

X.shape, y.shape

((156, 5), (156,))

## Training the Linear Model on 5-Day Window

In [None]:
# Split the data into training and test sets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((124, 5), (32, 5), (124,), (32,))

In [None]:
# Create and train the model
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train, y_train)

model.intercept_, model.coef_

(-0.8450759517745041,
 array([-0.14037808,  0.43799997,  0.28809522, -0.48559298,  1.07284192]))

## Visualization and Evaluation

In [None]:
# Predict on training set
y_train_pred = model.predict(X_train)

# Create a DataFrame for visualization
train_dates = window_df['Target Date'].iloc[:-len(y_test)].reset_index(drop=True)
train_results = pd.DataFrame({'Date': train_dates, 'Actual': y_train, 'Predicted': y_train_pred})

# Plot the predictions vs actual values
fig = px.line(train_results, x='Date', y=['Actual', 'Predicted'], title='Actual vs Predicted Stock Prices on Training Data (Monthly)')
fig.show()

In [None]:
# Predict on test set
from sklearn.metrics import mean_absolute_error

y_pred = model.predict(X_test)

# Calculate MAE
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error: {mae:.2f}")

Mean Absolute Error: 47.88


In [None]:
# Visualize the predictions vs actual values
test_dates = data['Date'].iloc[-len(y_test):]
results = pd.DataFrame({'Date': test_dates, 'Actual': y_test, 'Predicted': y_pred})

fig = px.line(results, x='Date', y=['Actual', 'Predicted'], title='Actual vs Predicted Stock Prices')
fig.show()


## Future Prediction

In [None]:
def recursive_forecast(model, initial_data, steps):
    """Predict future values using model recursively."""
    predictions = []
    input_data = list(initial_data)
    for _ in range(steps):
        prediction = model.predict([input_data[-window_size:]])[0]
        predictions.append(prediction)
        input_data.append(prediction)
    return predictions

# Predict the next 12 months
future_predictions = recursive_forecast(model, data['Close'].values[-window_size:], 12)

# Visualize the future predictions
future_dates = pd.date_range(data['Date'].iloc[-1], periods=13, freq='M').tolist()[1:]  # starts from the month after the last date in our data
future_data = pd.DataFrame({'Date': future_dates, 'Predicted': future_predictions})

fig = px.line(future_data, x='Date', y='Predicted', title='Predicted Tesla Stock Price for the Next 12 Months')
fig.show()


In [None]:
!jupyter nbconvert --to html notebook.ipynb

[NbConvertApp] Converting notebook notebook.ipynb to html
[NbConvertApp] Writing 681871 bytes to notebook.html
