# 내일주가 전처리

## Load

In [1]:
import FinanceDataReader as fdr
df = fdr.DataReader(symbol='028300')
df.tail(3)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-01-31,30650,31100,30150,30400,520871,-0.014587
2023-02-01,31000,33050,30900,31800,1411118,0.046053
2023-02-02,32250,32550,31600,31900,357041,0.003145


## Moving Average

In [2]:
def moving_average(
    df,
    windows = [5,20,60,120],
    change=False
):
    """
    
    This is a moving average of price.
    
    Args:
      windows: list. 
      change: float. True tries changing the MA of price. (Close - MA) / Close.
      
    """
    
    for window in windows:
        MA = 'MA' + str(window)
        df[MA] = df['Close'].rolling(window=window, min_periods=1).mean()
        if change:
            df[MA] = (df['Close'] - df[MA]) / df['Close']
        else:
            pass
        
    return df

In [3]:
df = moving_average(df, change=True)
df.tail(3)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Change,MA5,MA20,MA60,MA120
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-01-31,30650,31100,30150,30400,520871,-0.014587,-0.020395,-0.007895,-0.05188,-0.202434
2023-02-01,31000,33050,30900,31800,1411118,0.046053,0.02044,0.030346,-0.003339,-0.1461
2023-02-02,32250,32550,31600,31900,357041,0.003145,0.020376,0.028213,0.00188,-0.139183


## Value(원) → Change(%)

In [4]:
def changing_price(
    df
):
    """
    
    (Open - Close) / Close
    (High - Close) / Close
    (Low - Close) / Close
    
    (Volume - Volume) / Volume
    
    """
    
    df['Open'] = (df['Open'] - df['Close']) / df['Close']
    df['High'] = (df['High'] - df['Close']) / df['Close']
    df['Low'] = (df['Low'] - df['Close']) / df['Close']

    return df

In [5]:
df = changing_price(df)
df.tail(3)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Change,MA5,MA20,MA60,MA120
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-01-31,0.008224,0.023026,-0.008224,30400,520871,-0.014587,-0.020395,-0.007895,-0.05188,-0.202434
2023-02-01,-0.025157,0.039308,-0.028302,31800,1411118,0.046053,0.02044,0.030346,-0.003339,-0.1461
2023-02-02,0.010972,0.020376,-0.009404,31900,357041,0.003145,0.020376,0.028213,0.00188,-0.139183


## Create y label

In [6]:
def create_y_label(df):
    
    """
    
    y = 1 or 0
    
    """
    
    df['y'] = df['Change'] > 0
    df['y'] = df['y'].astype(int) 
    
    return df

In [7]:
df = create_y_label(df)
df.tail(3)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Change,MA5,MA20,MA60,MA120,y
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2023-01-31,0.008224,0.023026,-0.008224,30400,520871,-0.014587,-0.020395,-0.007895,-0.05188,-0.202434,0
2023-02-01,-0.025157,0.039308,-0.028302,31800,1411118,0.046053,0.02044,0.030346,-0.003339,-0.1461,1
2023-02-02,0.010972,0.020376,-0.009404,31900,357041,0.003145,0.020376,0.028213,0.00188,-0.139183,1


## Slice year

In [8]:
def slice_year(df,year):
    
    """
    
    after year
    
    """
    
    year = str(year)
    
    df = df[year:]
    
    return df

In [9]:
df = slice_year(df,'2015')
df.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Change,MA5,MA20,MA60,MA120,y
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2023-01-27,-0.003195,0.017572,-0.009585,31300,331294,-0.003185,0.002556,0.026438,-0.028036,-0.174274,0
2023-01-30,0.006483,0.025932,-0.001621,30850,376888,-0.014377,-0.009724,0.010049,-0.039673,-0.188521,0
2023-01-31,0.008224,0.023026,-0.008224,30400,520871,-0.014587,-0.020395,-0.007895,-0.05188,-0.202434,0
2023-02-01,-0.025157,0.039308,-0.028302,31800,1411118,0.046053,0.02044,0.030346,-0.003339,-0.1461,1
2023-02-02,0.010972,0.020376,-0.009404,31900,357041,0.003145,0.020376,0.028213,0.00188,-0.139183,1


## Split x and y

In [10]:
def split_x_y(df):
    
    """
    
    Prepare X and y
    
    """
    
    X = df[['MA5', 'MA20', 'MA60', 'MA120', 'Volume', 'Change', 'Open', 'High', 'Low']]
    y = df['y']
    
    X = X[:-1]
    y = y[1:]
    
    return X, y

In [17]:
X, y = split_x_y(df)
print(len(X),len(y))

1990 1990


## 최종코드

In [18]:
def moving_average(
    df,
    windows = [5,20,60,120],
    change=False
):
    """
    
    This is a moving average of price.
    
    Args:
      windows: list. 
      change: float. True tries changing the MA of price. (Close - MA) / Close.
      
    """
    
    for window in windows:
        MA = 'MA' + str(window)
        df[MA] = df['Close'].rolling(window=window, min_periods=1).mean()
        if change:
            df[MA] = (df['Close'] - df[MA]) / df['Close']
        else:
            pass
        
    return df

def changing_price(
    df
):
    """
    
    (Open - Close) / Close
    (High - Close) / Close
    (Low - Close) / Close
    
    (Volume - Volume) / Volume
    
    """
    
    df['Open'] = (df['Open'] - df['Close']) / df['Close']
    df['High'] = (df['High'] - df['Close']) / df['Close']
    df['Low'] = (df['Low'] - df['Close']) / df['Close']

    return df

def create_y_label(df):
    
    """
    
    y = 1 or 0
    
    """
    
    df['y'] = df['Change'] > 0
    df['y'] = df['y'].astype(int) 
    
    return df

def slice_year(df,year):
    
    """
    
    after year
    
    """
    
    year = str(year)
    
    df = df[year:]
    
    return df

def split_x_y(df):
    
    """
    
    Prepare X and y
    
    """
    
    X = df[['MA5', 'MA20', 'MA60', 'MA120', 'Volume', 'Change', 'Open', 'High', 'Low']]
    y = df['y']
    
    X = X[:-1]
    y = y[1:]
    
    return X, y

## Example

In [None]:
import FinanceDataReader as fdr
df = fdr.DataReader(symbol='028300')

In [20]:
df = moving_average(df, change=True)
df = changing_price(df)
df = create_y_label(df)
df = slice_year(df,'2015')
X, y = split_x_y(df)

In [23]:
X.tail()

Unnamed: 0_level_0,MA5,MA20,MA60,MA120,Volume,Change,Open,High,Low
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-01-26,0.009236,0.03121,-0.027849,-0.17291,385469,0.008026,-0.014331,0.0,-0.025478
2023-01-27,0.002556,0.026438,-0.028036,-0.174274,331294,-0.003185,-0.003195,0.017572,-0.009585
2023-01-30,-0.009724,0.010049,-0.039673,-0.188521,376888,-0.014377,0.006483,0.025932,-0.001621
2023-01-31,-0.020395,-0.007895,-0.05188,-0.202434,520871,-0.014587,0.008224,0.023026,-0.008224
2023-02-01,0.02044,0.030346,-0.003339,-0.1461,1411118,0.046053,-0.025157,0.039308,-0.028302


In [24]:
y.tail()

Date
2023-01-27    0
2023-01-30    0
2023-01-31    0
2023-02-01    1
2023-02-02    0
Name: y, dtype: int32