In [1]:
import pandas as pd
df = pd.read_csv('./DATASET/VHM.csv')

In [6]:
def preprocess_stock_data(csv_file_path):
    # Load data from CSV file
    data = pd.read_csv(csv_file_path)
    data['Date'] = pd.to_datetime(data['Date'], format='%d/%m/%Y')
    data.set_index('Date', inplace=True)
    
    # Fill missing dates
    full_date_range = pd.date_range(start=data.index.min(), end=data.index.max(), freq='D')
    data = data.reindex(full_date_range)
    data = data.ffill()
    data.reset_index(inplace=True)
    data.rename(columns={'index': 'Date'}, inplace=True)
    
    # Convert volume from string to number
    def convert_volume_to_number(volume_str):
        if isinstance(volume_str, float) and np.isnan(volume_str):
            return np.nan
        if volume_str[-1] == 'K':
            return int(float(volume_str[:-1]) * 1000)
        elif volume_str[-1] == 'M':
            return int(float(volume_str[:-1]) * 1000000)
        elif volume_str[-1] == 'B':
            return int(float(volume_str[:-1]) * 1000000000)
        else:
            return int(volume_str)
    
    # Apply convert_volume_to_number function to Volume column
    data["Volume"] = data["Volume"].apply(convert_volume_to_number)
    
    # Convert change % from string to number
    def convert_change_to_number(change_str):
        if isinstance(change_str, float) and np.isnan(change_str):
            return np.nan
        new_change = float(change_str.strip('%')) / 100
        return new_change

    # Apply convert_change_to_number function to Change % column
    data["Change %"] = data["Change %"].apply(convert_change_to_number)
    
    # Function to convert string to number
    def convert_str_to_number(str_value):
        if isinstance(str_value, float) and np.isnan(str_value):
            return np.nan
        return float(str_value.replace(',', ''))
    
    # Apply convert_str_to_number function to Close, Open, High, Low columns
    data['Close'] = data['Close'].apply(convert_str_to_number)
    data['Open'] = data['Open'].apply(convert_str_to_number)
    data['High'] = data['High'].apply(convert_str_to_number)
    data['Low'] = data['Low'].apply(convert_str_to_number)
    
    data = data.sort_values('Date', ascending=True)
    return data

dxg = preprocess_stock_data('./DATASET/DXG.csv')
qcg = preprocess_stock_data('./DATASET/QCG.csv')
vhm = preprocess_stock_data('./DATASET/VHM.csv')

In [5]:
print(dxg['Close'].mean()) 
print(dxg['Close'].median())
print(dxg['Close'].std())
print(dxg['Close'].min())
print(dxg['Close'].max())
print(dxg['Close'].quantile(0.25))
print(dxg['Close'].quantile(0.5))
print(dxg['Close'].quantile(0.75))
print(dxg['Close'].skew())
print(dxg['Close'].kurtosis())
print(len(dxg))



17471.919906201147
15624.0
7659.188432697695
6739.1
46750.0
12260.9
15624.0
20262.5
1.4941014135187065
2.376422687782688
1919


In [7]:
print(qcg['Close'].mean()) 
print(qcg['Close'].median())
print(qcg['Close'].std())
print(qcg['Close'].min())
print(qcg['Close'].max())
print(qcg['Close'].quantile(0.25))
print(qcg['Close'].quantile(0.5))
print(qcg['Close'].quantile(0.75))
print(qcg['Close'].skew())
print(qcg['Close'].kurtosis())
print(len(qcg))

7870.88587806149
7330.0
3343.564101983173
3320.0
23200.0
5050.0
7330.0
9500.0
1.0254708021351782
0.8711659627250925
1919


In [8]:
print(vhm['Close'].mean()) 
print(vhm['Close'].median())
print(vhm['Close'].std())
print(vhm['Close'].min())
print(vhm['Close'].max())
print(vhm['Close'].quantile(0.25))
print(vhm['Close'].quantile(0.5))
print(vhm['Close'].quantile(0.75))
print(vhm['Close'].skew())
print(vhm['Close'].kurtosis())
print(len(vhm))

61135.31578947369
61179.0
12375.843671994879
38450.0
88722.0
51500.0
61179.0
70756.0
-0.026475716636182645
-0.966473155129048
1919
