In [32]:
# Gruppemedlemmer
# Hanne Austad s375093
# Jaspreet Kaur s375164
# Maryam Alam s375091
# Masooma Zahrah Azhar Khanum s374971

# TASK: Make a prediction algorithm which predicts the price of this stock on a specific date. Input will be date and output should be price of that stock (close value in the data file). You should also show the prediction percentage score.

# 1) We have chosen the Tesla "Predict stock market price for TESLA" use case
# 2 + 3) Given the different nature of regression and classification algorithms, we have chosen to use a regression algorithm in this case. 
# This is because regression algorithms are used for predicting continuous numerical values, while classification algorithms are used for predicting categorical labels. 
# Since the assignment asks us to predict the price, and not whether it increases or decreases (which would be a categorical prediction), a regression algorithm seems to be the logical choice. 

In [33]:
# Importing modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Machine learning modules
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score


In [34]:
# Reading CSV
Tesla_df = pd.read_csv('data2/TSLA.csv')

# Printing number of rows/columns and top 5 rows to get an overview
print(Tesla_df.shape)
print (Tesla_df.head(5))

(2845, 7)
         Date   Open   High    Low  Close  Adj Close    Volume
0  2010-06-29  3.800  5.000  3.508  4.778      4.778  93831500
1  2010-06-30  5.158  6.084  4.660  4.766      4.766  85935500
2  2010-07-01  5.000  5.184  4.054  4.392      4.392  41094000
3  2010-07-02  4.600  4.620  3.742  3.840      3.840  25699000
4  2010-07-06  4.000  4.000  3.166  3.222      3.222  34334500


In [35]:
# Checking for null values to see if the dataset needs to be cleaned first
Tesla_df.isnull().sum()

Date         0
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [36]:
# Let's split the dataset into training and testing, and set a random state for reproducibility.
TeslaTrain = Tesla_df[['Open','High','Low','Adj Close','Volume']]
TeslaTest = Tesla_df['Close']
TeslaTrain_train, TeslaTrain_test, TeslaTest_train, TeslaTest_test = train_test_split(TeslaTrain, TeslaTest, test_size=0.3, random_state=42)

In [37]:
# Initialize Linear Regression
model = LinearRegression()

# 4 + 5) Train the model and make a prediction
model.fit(TeslaTrain_train, TeslaTest_train)
predictedPrice = model.predict(TeslaTrain_test) # Predicts closing prices for all rows in the dataset

In [38]:
# Let's now predict the price for 2 random dates that exist in the test set
randomDates = Tesla_df.loc[Tesla_df.index.isin(TeslaTrain_test.index), 'Date']

dato = randomDates.iloc[0]
valgtDato = TeslaTrain_test.loc[Tesla_df['Date'] == dato, ['Open', 'High', 'Low', 'Adj Close', 'Volume']]
datoPredictedPrice = model.predict(valgtDato)

dato2 = randomDates.iloc[1]
valgtDato2 = TeslaTrain_test.loc[Tesla_df['Date'] == dato2, ['Open', 'High', 'Low', 'Adj Close', 'Volume']]
datoPredictedPrice2 = model.predict(valgtDato2)

# Print the predictions and actual values
print(f'Predicted price for {dato} is {datoPredictedPrice[0]}')
print(f'Actual price was {Tesla_df.loc[Tesla_df["Date"] == dato, "Close"].values[0]}')

print(f'Predicted price for {dato2} is {datoPredictedPrice2[0]}')
print(f'Actual price was {Tesla_df.loc[Tesla_df["Date"] == dato2, "Close"].values[0]}')


Predicted price for 2010-07-26 is 4.190000000000344
Actual price was 4.19
Predicted price for 2010-08-04 is 4.2520000000003435
Actual price was 4.252


In [39]:
# Evaluating the model - we will use Mean Absolute Error (MAE) to evaluate and R2 for percentage score
mae = mean_absolute_error(TeslaTest_test, predictedPrice)
r2 = r2_score(TeslaTest_test, predictedPrice)
print(f"MAE: {mae}")
print(f"Prediction percentage score (R^2): {r2 * 100:.2f}%")

MAE: 3.741295589751467e-13
Prediction percentage score (R^2): 100.00%
