### Import the Packages

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
sns.set()

### Importing the Data

In [2]:
data = pd.read_csv('Index2018.csv')

In [3]:
data_comp = data.copy()

In [4]:
data_comp.head()

Unnamed: 0,date,spx,dax,ftse,nikkei
0,07/01/1994,469.9,2224.95,3445.98,18124.01
1,10/01/1994,475.27,2225.0,3440.58,18443.44
2,11/01/1994,474.13,2228.1,3413.77,18485.25
3,12/01/1994,474.17,2182.06,3372.02,18793.88
4,13/01/1994,472.47,2142.37,3360.01,18577.26


### Examining the Data

In [5]:
data_comp.describe()

Unnamed: 0,spx,dax,ftse,nikkei
count,6269.0,6269.0,6269.0,6269.0
mean,1288.127542,6080.063363,5422.713545,14597.0557
std,487.586473,2754.361032,1145.572428,4043.122953
min,438.92,1911.7,2876.6,7054.98
25%,990.671905,4069.35,4486.1,10709.29
50%,1233.42,5773.34,5662.43,15028.17
75%,1459.987747,7443.07,6304.25,17860.47
max,2872.867839,13559.6,7778.637689,24124.15


In [6]:
data_comp.date.describe()

count           6269
unique          6269
top       07/01/1994
freq               1
Name: date, dtype: object

In [7]:
data_comp.date = pd.to_datetime(data_comp.date, dayfirst = True)

In [8]:
data_comp.date.describe()

  data_comp.date.describe()


count                    6269
unique                   6269
top       1994-01-07 00:00:00
freq                        1
first     1994-01-07 00:00:00
last      2018-01-29 00:00:00
Name: date, dtype: object

In [9]:
data_comp.set_index("date", inplace = True)

In [10]:
data_comp.head()

Unnamed: 0_level_0,spx,dax,ftse,nikkei
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1994-01-07,469.9,2224.95,3445.98,18124.01
1994-01-10,475.27,2225.0,3440.58,18443.44
1994-01-11,474.13,2228.1,3413.77,18485.25
1994-01-12,474.17,2182.06,3372.02,18793.88
1994-01-13,472.47,2142.37,3360.01,18577.26


### Setting the Frequency

In [11]:
data_comp = data_comp.asfreq('d') # d=daily

In [12]:
data_comp.head()

Unnamed: 0_level_0,spx,dax,ftse,nikkei
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1994-01-07,469.9,2224.95,3445.98,18124.01
1994-01-08,,,,
1994-01-09,,,,
1994-01-10,475.27,2225.0,3440.58,18443.44
1994-01-11,474.13,2228.1,3413.77,18485.25


##### The two rows of NaN values indicates that we have generated new periods that do not have values associated with them
##### Also to notice we do not consider the closing values for the holidays or weekends (sat & sun), but 8th Jan, 1994 & 9th Jan, 1994 falls on sat & sun resp., so we must avoid counting those

### To display the data only for the business days, removing the display of unwanted data

In [13]:
data_comp = data_comp.asfreq('b') #b=business days

In [14]:
data_comp.head()

Unnamed: 0_level_0,spx,dax,ftse,nikkei
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1994-01-07,469.9,2224.95,3445.98,18124.01
1994-01-10,475.27,2225.0,3440.58,18443.44
1994-01-11,474.13,2228.1,3413.77,18485.25
1994-01-12,474.17,2182.06,3372.02,18793.88
1994-01-13,472.47,2142.37,3360.01,18577.26


### Missing Values

In [15]:
data_comp.isna()

Unnamed: 0_level_0,spx,dax,ftse,nikkei
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1994-01-07,False,False,False,False
1994-01-10,False,False,False,False
1994-01-11,False,False,False,False
1994-01-12,False,False,False,False
1994-01-13,False,False,False,False
...,...,...,...,...
2018-01-23,False,False,False,False
2018-01-24,False,False,False,False
2018-01-25,False,False,False,False
2018-01-26,False,False,False,False


##### True: Missing values present for that period
#####  False: No Missing Values for that period

In [17]:
data_comp.isna().sum() # For each attribute, this will determine the number of instances without available information

spx       8
dax       8
ftse      8
nikkei    8
dtype: int64

##### Setting the frequency to "business days", must have generated 8 Dates, for which we have no data available 

In [18]:
data_comp.spx = data_comp.spx.fillna(method = "ffill")

In [19]:
data_comp.isna().sum() # To check again whether the method used worked

spx       0
dax       8
ftse      8
nikkei    8
dtype: int64

In [26]:
data_comp.ftse = data_comp.ftse.fillna(method = "bfill")
data_comp.nikkei = data_comp.nikkei.fillna(method = "bfill")

In [27]:
data_comp.isna().sum() # To check again whether the method used worked

spx       0
dax       0
ftse      0
nikkei    0
dtype: int64

In [28]:
data_comp.dax = data_comp.dax.fillna(value = data_comp.dax.mean())

In [29]:
data_comp.isna().sum()

spx       0
dax       0
ftse      0
nikkei    0
dtype: int64

### Simplifying the dataset

In [None]:
#The less data we upload, the fast we can manupulate da

### Plotting the Data

### The QQ Plot