# IHSG Pre-processing Data

### Audhi Aprilliant

## 1 Import Libraries

In [222]:
import pandas as pd                   # Dataframe manipulation
import numpy as np                    # Mathematics operation

## Load the Data

In [223]:
ihsg_data = pd.read_csv('Datasets/4 IHSG 2019.csv')

In [224]:
print('Dimension of financial news:\n{}'.format(ihsg_data.shape[0]),
      'rows and {}'.format(ihsg_data.shape[1]),'columns')
ihsg_data.head(6)

Dimension of financial news:
371 rows and 8 columns


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Used
0,2018-12-28,6200.296875,6212.125,6176.630859,6194.498047,6194.498047,69954900.0,1
1,2018-12-29,,,,,,,0
2,2018-12-30,,,,,,,0
3,2018-12-31,,,,,,,0
4,2019-01-01,,,,,,,0
5,2019-01-02,6197.871094,6205.89502,6164.833984,6181.174805,6181.174805,52797800.0,1


In [225]:
# Check missing value
ihsg_data.isna().sum()

Date           0
Open         124
High         124
Low          124
Close        124
Adj Close    124
Volume       124
Used           0
dtype: int64

## 2 Function to Calculate the Return of IHSG

In [226]:
def return_ihsg(df):
    df_no_na = df.dropna().reset_index(drop=True)
    return_data = [0]
    index_data = list(df_no_na.index)
    for i in range(1,len(index_data)):
        return_i = (df_no_na['Adj Close'][index_data[i]] - 
                    df_no_na['Adj Close'][index_data[i-1]])/df_no_na['Adj Close'][index_data[i]]
        return_data.append(return_i)
    df_no_na = pd.concat([df_no_na['Date'],pd.DataFrame(return_data)],axis=1)
    df_full = pd.merge(df,df_no_na,on='Date',how='left')
    df_full = df_full.rename(columns = {0:'Return'})
    return df_full

## 3 Function to Apply Curve Function

In [227]:
def curve_function(df):
    for i in df.columns:
        while df[i].isna().sum() > 0:
            for j in range(df.shape[0]):
                if pd.isnull(df.loc[j,i]):
                    seq_k = [j]
                    k = j
                    while pd.isnull(df.loc[k,i]):
                        k = k + 1
                        seq_k.append(k)
                    if len(seq_k) % 2 == 0:
                        df.loc[seq_k[int((len(seq_k) - 1)/2)],i] = (df.loc[j - 1,i] + 
                                                                    df.loc[seq_k[len(seq_k) - 1],i])/2
                    else:
                        df.loc[seq_k[int((len(seq_k) - 1)/2)],i] = (df.loc[j - 1,i] + 
                                                                    df.loc[seq_k[len(seq_k) - 1],i])/2
                else:
                    df.loc[j,i] = df.loc[j,i]
    return(df)

## 4 Apply Functions Above

In [228]:
ihsg_data_clean = return_ihsg(ihsg_data)
print('Dimension of financial news:\n{}'.format(ihsg_data_clean.shape[0]),
      'rows and {}'.format(ihsg_data_clean.shape[1]),'columns')
ihsg_data_clean.head()

Dimension of financial news:
371 rows and 9 columns


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Used,Return
0,2018-12-28,6200.296875,6212.125,6176.630859,6194.498047,6194.498047,69954900.0,1,0.0
1,2018-12-29,,,,,,,0,
2,2018-12-30,,,,,,,0,
3,2018-12-31,,,,,,,0,
4,2019-01-01,,,,,,,0,


In [229]:
ihsg_data_curve = curve_function(ihsg_data_clean)
print('Dimension of financial news:\n{}'.format(ihsg_data_curve.shape[0]),
      'rows and {}'.format(ihsg_data_curve.shape[1]),'columns')
ihsg_data_curve.head()

Dimension of financial news:
371 rows and 9 columns


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Used,Return
0,2018-12-28,6200.296875,6212.125,6176.630859,6194.498047,6194.498047,69954900.0,1,0.0
1,2018-12-29,6199.993652,6211.346252,6175.15625,6192.832642,6192.832642,67810262.5,0,-0.000269
2,2018-12-30,6199.69043,6210.567505,6173.68164,6191.167236,6191.167236,65665625.0,0,-0.000539
3,2018-12-31,6199.083985,6209.01001,6170.732422,6187.836426,6187.836426,61376350.0,0,-0.001078
4,2019-01-01,6198.477539,6207.452515,6167.783203,6184.505616,6184.505616,57087075.0,0,-0.001617


## Save Data

In [230]:
ihsg_data_curve.to_csv("Datasets/interim/Clean Data of JCI 2019.csv",index=False)