### An Analysis of  Germany Dax 40 Futures 

### Data Understanding 
##### This steps involves collecting the data  and understanding the structure of the data 

In [21]:
## importing various libraries that will aid in collecting data 

## importing investpy library that will pull data  from investing.com

import investpy as inv
import pandas as pd 
import seaborn as sns

In [6]:

# Retrieve historical data for the DAX index
df = inv.get_index_historical_data(index='DAX', country='germany', from_date='01/01/2019', to_date='12/04/2024')

# Calculate percentage change
df['Change'] = round(df['Close'].pct_change() * 100,2)

print(df.head())



                Open      High       Low     Close    Volume Currency  Change
Date                                                                         
2019-01-02  10477.77  10612.72  10386.97  10580.19  82089000      EUR     NaN
2019-01-03  10467.11  10538.66  10400.11  10416.66  87354000      EUR   -1.55
2019-01-04  10533.94  10786.34  10483.90  10767.69  98288000      EUR    3.37
2019-01-07  10814.39  10814.47  10681.27  10747.81  73352000      EUR   -0.18
2019-01-08  10750.19  10910.71  10745.03  10803.98  96569000      EUR    0.52


In [7]:
## listing the  number of columns 
df.columns

Index(['Open', 'High', 'Low', 'Close', 'Volume', 'Currency', 'Change'], dtype='object')

In [8]:
### checking the top 5 rows 
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Currency,Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-01-02,10477.77,10612.72,10386.97,10580.19,82089000,EUR,
2019-01-03,10467.11,10538.66,10400.11,10416.66,87354000,EUR,-1.55
2019-01-04,10533.94,10786.34,10483.9,10767.69,98288000,EUR,3.37
2019-01-07,10814.39,10814.47,10681.27,10747.81,73352000,EUR,-0.18
2019-01-08,10750.19,10910.71,10745.03,10803.98,96569000,EUR,0.52


In [9]:
## understanding the shape of our data
print(f"Our data contains {df.shape[0]} rows and {df.shape[1]} columns")

Our data contains 1344 rows and 7 columns


In [10]:
# checking the details/info of all our columns
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1344 entries, 2019-01-02 to 2024-04-12
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Open      1344 non-null   float64
 1   High      1344 non-null   float64
 2   Low       1344 non-null   float64
 3   Close     1344 non-null   float64
 4   Volume    1344 non-null   int64  
 5   Currency  1344 non-null   object 
 6   Change    1343 non-null   float64
dtypes: float64(5), int64(1), object(1)
memory usage: 84.0+ KB


### From the data understanding we can conclude that :

##### 1.Our data contains 7 columns and 1344 rows 
##### 2. We have 3 different data types that is integer, float64 and object


### Data Quality Issues :
##### 1.Our data has one missing value
##### 2.The date column is not  been recognized as a column
##### 3.We will also have  to add the month ,day  column  using code as our API - libary did not allow us to add them from the data source



### How to improve the data quality issue :
##### 1.Replacing the missing values
##### 2.Resetting our index so that the dates can be recognized as columns


### Is there any data restructing to be done ? Yes 
#### 1.Adding  the day and Month Column

## Data Cleaning 

In [11]:
# reseting the index so that the dates can well corrected 

df.reset_index(inplace=True)

In [12]:
# confirming if the index has be reset
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Currency,Change
0,2019-01-02,10477.77,10612.72,10386.97,10580.19,82089000,EUR,
1,2019-01-03,10467.11,10538.66,10400.11,10416.66,87354000,EUR,-1.55
2,2019-01-04,10533.94,10786.34,10483.9,10767.69,98288000,EUR,3.37
3,2019-01-07,10814.39,10814.47,10681.27,10747.81,73352000,EUR,-0.18
4,2019-01-08,10750.19,10910.71,10745.03,10803.98,96569000,EUR,0.52


In [13]:
# replacing  the missing values 
df.isna().sum()

Date        0
Open        0
High        0
Low         0
Close       0
Volume      0
Currency    0
Change      1
dtype: int64

In [14]:
#filling the missing values with the mean
df.fillna(value = df["Change"].mean())

Unnamed: 0,Date,Open,High,Low,Close,Volume,Currency,Change
0,2019-01-02,10477.77,10612.72,10386.97,10580.19,82089000,EUR,0.047677
1,2019-01-03,10467.11,10538.66,10400.11,10416.66,87354000,EUR,-1.550000
2,2019-01-04,10533.94,10786.34,10483.90,10767.69,98288000,EUR,3.370000
3,2019-01-07,10814.39,10814.47,10681.27,10747.81,73352000,EUR,-0.180000
4,2019-01-08,10750.19,10910.71,10745.03,10803.98,96569000,EUR,0.520000
...,...,...,...,...,...,...,...,...
1339,2024-04-08,18163.39,18326.37,18163.39,18318.97,61073600,EUR,0.790000
1340,2024-04-09,18318.40,18318.51,18058.22,18076.69,65276400,EUR,-1.320000
1341,2024-04-10,18078.89,18239.50,17975.84,18097.30,83348200,EUR,0.110000
1342,2024-04-11,18096.51,18101.31,17864.69,17954.48,84365104,EUR,-0.790000


In [15]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1344 entries, 0 to 1343
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   Date      1344 non-null   datetime64[ns]
 1   Open      1344 non-null   float64       
 2   High      1344 non-null   float64       
 3   Low       1344 non-null   float64       
 4   Close     1344 non-null   float64       
 5   Volume    1344 non-null   int64         
 6   Currency  1344 non-null   object        
 7   Change    1343 non-null   float64       
dtypes: datetime64[ns](1), float64(5), int64(1), object(1)
memory usage: 84.1+ KB


In [16]:
# Convert the "Date" column to datetime format if it's not already in datetime format
df['Date'] = pd.to_datetime(df['Date'])

# getting the day from the  date column
df["day"] = df["Date"].dt.day_name()

# getting the month from the month column
df["month"] = df["Date"].dt.month_name()

In [17]:
# checking if the  the day and month have been added 
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Currency,Change,day,month
0,2019-01-02,10477.77,10612.72,10386.97,10580.19,82089000,EUR,,Wednesday,January
1,2019-01-03,10467.11,10538.66,10400.11,10416.66,87354000,EUR,-1.55,Thursday,January
2,2019-01-04,10533.94,10786.34,10483.9,10767.69,98288000,EUR,3.37,Friday,January
3,2019-01-07,10814.39,10814.47,10681.27,10747.81,73352000,EUR,-0.18,Monday,January
4,2019-01-08,10750.19,10910.71,10745.03,10803.98,96569000,EUR,0.52,Tuesday,January


## Exploratory Data Analysis

### Here we will explore/analyze data to gain insights and better understanding 

#### Statistical Analysis ,


In [20]:
# writing a function that get min ,max ,mean() and total trading volume
max_price = df["High"].max()
min_price = df["Low"].min()
avg_price = df["Close"].mean()
total_volume = df["Volume"].sum()

print("Dax Analysis:")
print(f"Max price:{max_price:.2f}")
print(f"Min Price:{min_price:.2f}")
print(f'avg_price:{avg_price:.2f}')
print(f'total_volume:{total_volume:.2f}')

Dax Analysis:
Max price:18567.16
Min Price:8255.65
avg_price:14036.31
total_volume:107329552768.00


# Univariate Analysis