Import the neccessary libraries

In [37]:
import yfinance as yf
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression,Lasso,Ridge,ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error,r2_score,mean_absolute_error
print("----- Libraries imported successfully!")

----- Libraries imported successfully!


Set style for better visualization

In [38]:
plt.style.use("seaborn-v0_8")
sns.set_palette("husl")

Data Acquistion and Data Loading

In [39]:
# Fetch Audi stock data (Audi is part of volkswagen Group, we will use VOW3.DE)
# VOW3.DE is Volkswagen AG preference shares which include Audi
ticker = "VOW3.DE"
start_date = "2015-01-01"
end_date = "2024-01-01"

# Download the stock data
print("Downloading Audi Stock Data.....")
stock_data = yf.download(ticker,start=start_date,end=end_date)

[*********************100%***********************]  1 of 1 completed

Downloading Audi Stock Data.....





In [None]:
stock_data.to_csv("data.csv",index=False)

In [40]:
# Display basic information about the data
print(f"Data Shape: {stock_data.shape}")

Data Shape: (2288, 5)


In [41]:
stock_data

Price,Close,High,Low,Open,Volume
Ticker,VOW3.DE,VOW3.DE,VOW3.DE,VOW3.DE,VOW3.DE
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2015-01-02,103.257202,104.954581,102.182199,104.388788,641902
2015-01-05,99.749290,102.634829,99.296654,102.521672,1135396
2015-01-06,100.371643,101.955865,98.617690,100.145329,1030867
2015-01-07,100.994026,102.012455,100.060471,101.163766,692227
2015-01-08,104.021011,104.501930,101.276913,102.210467,976784
...,...,...,...,...,...
2023-12-21,97.592812,97.592812,96.620873,97.367182,989675
2023-12-22,97.679588,98.339118,97.367180,97.419252,506921
2023-12-27,97.887871,97.922577,96.967998,97.506037,633438
2023-12-28,97.332474,98.044070,96.967993,97.731661,526457


In [42]:
print(stock_data.info())

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2288 entries, 2015-01-02 to 2023-12-29
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   (Close, VOW3.DE)   2288 non-null   float64
 1   (High, VOW3.DE)    2288 non-null   float64
 2   (Low, VOW3.DE)     2288 non-null   float64
 3   (Open, VOW3.DE)    2288 non-null   float64
 4   (Volume, VOW3.DE)  2288 non-null   int64  
dtypes: float64(4), int64(1)
memory usage: 107.2 KB
None


In [43]:
print("----- Basic Statistics -----")
print(stock_data.describe())

----- Basic Statistics -----
Price         Close         High          Low         Open        Volume
Ticker      VOW3.DE      VOW3.DE      VOW3.DE      VOW3.DE       VOW3.DE
count   2288.000000  2288.000000  2288.000000  2288.000000  2.288000e+03
mean      95.856341    97.155678    94.626778    95.978871  1.280801e+06
std       19.963326    20.160640    19.774907    20.012758  7.854458e+05
min       53.392242    54.918393    49.297677    53.184128  0.000000e+00
25%       83.340548    84.467194    82.357174    83.504233  8.568738e+05
50%       94.045135    95.173310    92.884975    94.090418  1.099580e+06
75%      105.378181   106.295680   104.070361   105.255573  1.501650e+06
max      158.730423   162.367913   157.668137   160.951542  1.419737e+07


Data Preprocessing

In [44]:
# Check for missing values
stock_data_missing = stock_data.isnull().sum()
print("----- Missing Values -----")
print(stock_data)

----- Missing Values -----
Price            Close        High         Low        Open   Volume
Ticker         VOW3.DE     VOW3.DE     VOW3.DE     VOW3.DE  VOW3.DE
Date                                                               
2015-01-02  103.257202  104.954581  102.182199  104.388788   641902
2015-01-05   99.749290  102.634829   99.296654  102.521672  1135396
2015-01-06  100.371643  101.955865   98.617690  100.145329  1030867
2015-01-07  100.994026  102.012455  100.060471  101.163766   692227
2015-01-08  104.021011  104.501930  101.276913  102.210467   976784
...                ...         ...         ...         ...      ...
2023-12-21   97.592812   97.592812   96.620873   97.367182   989675
2023-12-22   97.679588   98.339118   97.367180   97.419252   506921
2023-12-27   97.887871   97.922577   96.967998   97.506037   633438
2023-12-28   97.332474   98.044070   96.967993   97.731661   526457
2023-12-29   97.020065   97.401893   96.811788   97.245689   419018

[2288 rows x 5 colum

In [45]:
# Check for duplicated rows
stock_data_duplicated = stock_data.duplicated().sum()
print("----- Duplicated Rows -----")
print(stock_data_duplicated)

----- Duplicated Rows -----
1


In [None]:
stock_data.drop_duplicates()


Price,Close,High,Low,Open,Volume
Ticker,VOW3.DE,VOW3.DE,VOW3.DE,VOW3.DE,VOW3.DE
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2015-01-02,103.257202,104.954581,102.182199,104.388788,641902
2015-01-05,99.749290,102.634829,99.296654,102.521672,1135396
2015-01-06,100.371643,101.955865,98.617690,100.145329,1030867
2015-01-07,100.994026,102.012455,100.060471,101.163766,692227
2015-01-08,104.021011,104.501930,101.276913,102.210467,976784
...,...,...,...,...,...
2023-12-21,97.592812,97.592812,96.620873,97.367182,989675
2023-12-22,97.679588,98.339118,97.367180,97.419252,506921
2023-12-27,97.887871,97.922577,96.967998,97.506037,633438
2023-12-28,97.332474,98.044070,96.967993,97.731661,526457


In [81]:
stock_data_duplicated2 = stock_data.duplicated().sum()
print("----- Duplicated Rows -----")
print(stock_data_duplicated2)

----- Duplicated Rows -----
1
