In [1]:
%matplotlib inline
import numpy as np 
import matplotlib.pyplot as plt 
import pandas as pd

In [2]:
from statsmodels.compat import lzip
import statsmodels.api as sm
from statsmodels.formula.api import ols

In [3]:
import tensorflow as tf

In [4]:
data = pd.read_csv('MACRO_202006.csv')
data['QUARTER'] = ((data['YYM'] % 100)/3).astype(int) # % 나머지 
data['RollingMean']= data.REALGDP.rolling(12).mean()
data['TARGET1'] = (data.REALGDP > data.RollingMean).astype(int).shift(-1)
pct_cols = ['M2', 'INFL']
data.loc[:, pct_cols] = data.loc[:, pct_cols].pct_change(1)
df = pd.get_dummies(data, columns=['QUARTER'], drop_first=True).dropna()

In [5]:
df.tail()

Unnamed: 0,YYM,REALGDP,REALCONS,INV,M2,INFL,UNEMP,EMPLOY,CD_3M,RollingMean,TARGET1,QUARTER_2,QUARTER_3,QUARTER_4
69,201903,-0.3,0.1,-4.428652,0.01748,0.001342,3.8,0.374808,1.9,0.675,1.0,0,0,0
70,201906,1.0,0.7,7.329217,0.015799,0.003732,4.0,0.148919,1.8,0.666667,0.0,1,0,0
71,201909,0.4,0.4,-3.462833,0.022418,0.003051,3.4,0.413171,1.54,0.666667,1.0,0,1,0
72,201912,1.3,0.7,0.497433,0.019514,-0.00076,3.7,0.614444,1.53,0.708333,0.0,0,0,1
73,202003,-1.3,-6.5,-0.563949,0.025006,0.003995,3.8,-0.0897,1.23,0.533333,0.0,0,0,0


RealGDP, 전분기 대비 증가율<br>
REALCONS : 전분기 대비 소비증가율<br>
INV : 전분기대비 투자증가율<br>
M2 : 전분기대비 M2증가율<br>
INFL : 전분기대비 소비물가증가율<br>
UNEMP : 현재 실업율<br>
EMPLOY :  전분기대비 취업자수증가율<br>
RollingMean : 20분기 평균 전분기 대비 REALGDP증가율'<br>
TARGET : 호황지표<br>
TARGET1 : 1분기 후 호황지표( 현재의 데이터로 1분기 후의 경제상황을 예측하고자 함)<br>

In [7]:
df.TARGET1.value_counts()

0.0    32
1.0    31
Name: TARGET1, dtype: int64

In [8]:
df1 = df.copy()

In [9]:
df1.columns

Index(['YYM', 'REALGDP', 'REALCONS', 'INV', 'M2', 'INFL', 'UNEMP', 'EMPLOY',
       'CD_3M', 'RollingMean', 'TARGET1', 'QUARTER_2', 'QUARTER_3',
       'QUARTER_4'],
      dtype='object')

In [10]:
x_data =df1[['REALGDP', 'REALCONS', 'INV', 'M2', 'INFL', 'UNEMP', 'EMPLOY', 'CD_3M']].to_numpy()

In [11]:
x_data

array([[ 4.00000000e-01,  1.00000000e-01, -8.25037944e-01,
         1.94805195e-02,  1.48766853e-02,  3.60000000e+00,
         4.86417886e-02,  3.52000000e+00],
       [ 8.00000000e-01,  1.10000000e+00,  3.40984285e+00,
         1.25265393e-02, -4.30297061e-03,  3.80000000e+00,
         7.08940876e-01,  3.39000000e+00],
       [ 9.00000000e-01,  1.00000000e+00, -1.88109113e+00,
         1.66701615e-02,  2.00588358e-02,  3.70000000e+00,
         2.67711767e-02,  3.55000000e+00],
       [ 1.90000000e+00,  2.40000000e+00,  4.33214934e+00,
         2.61936681e-02, -3.01154852e-03,  3.80000000e+00,
         4.56304460e-02,  3.51000000e+00],
       [ 1.50000000e+00,  1.20000000e+00,  2.43754686e+00,
         1.79881419e-02,  1.20569827e-02,  3.90000000e+00,
        -1.40337340e-02,  3.70000000e+00],
       [ 1.00000000e+00,  1.10000000e+00, -3.94623676e-01,
         4.24481737e-03, -2.97201250e-03,  3.60000000e+00,
         5.80288609e-01,  4.04000000e+00],
       [ 1.60000000e+00,  1.000000

In [12]:
y_data = df1.TARGET1

In [13]:
y_data.value_counts()

0.0    32
1.0    31
Name: TARGET1, dtype: int64

<h2> 정규화를 위한 함수 (최대 최소값이 1과 0이되도록 Scaling한다) </h2>

In [14]:
def normalization(data):
    numerator = data - np.min(data, 0)
    denominator = np.max(data, 0) - np.min(data, 0)
    return numerator / denominator

In [15]:
x_data = normalization(x_data)

In [16]:
x_data

array([[0.58730159, 0.65346535, 0.52062391, 0.44481714, 0.77479334,
        0.6       , 0.3288079 , 0.50219298],
       [0.65079365, 0.75247525, 0.63269454, 0.28198723, 0.22808736,
        0.8       , 0.70580099, 0.47368421],
       [0.66666667, 0.74257426, 0.49267683, 0.37901161, 0.92250781,
        0.7       , 0.31632103, 0.50877193],
       [0.82539683, 0.88118812, 0.65710218, 0.60200789, 0.26489866,
        0.8       , 0.32708859, 0.5       ],
       [0.76190476, 0.76237624, 0.60696398, 0.40987259, 0.6944192 ,
        0.9       , 0.29302375, 0.54166667],
       [0.68253968, 0.75247525, 0.53201427, 0.08806777, 0.26602562,
        0.6       , 0.63234789, 0.61622807],
       [0.77777778, 0.74257426, 0.67752606, 0.46743075, 0.74774151,
        0.6       , 0.67873849, 0.66666667],
       [0.63492063, 0.77227723, 0.44176044, 0.82550042, 0.37891267,
        0.5       , 0.45710994, 0.71052632],
       [0.77777778, 0.75247525, 0.73784493, 0.80315885, 0.713758  ,
        0.5       , 0.426590

In [17]:
#convert into numpy and float format
X = np.asarray(x_data, dtype=np.float32)
y = np.asarray(y_data, dtype=np.float32)

In [18]:
print(X.shape)
print(y.shape)

(63, 8)
(63,)


In [19]:
k = x_data.shape[1]

In [20]:
k

8

In [21]:
learning_rate = tf.Variable(0.003)

W = tf.Variable(tf.random.normal(([k, 1])), name='weight')
b = tf.Variable(tf.random.normal(([1])), name='bias')

for i in range(10000+1):
    with tf.GradientTape() as tape:
        
        hypothesis  = tf.sigmoid(tf.matmul(X, W) + b)
        
        cost = -tf.reduce_mean(y * tf.math.log(hypothesis) + (1 - y) * tf.math.log(1 - hypothesis))

        W_grad, b_grad = tape.gradient(cost, [W, b])
        
        W.assign_sub(learning_rate * W_grad)
        b.assign_sub(learning_rate * b_grad)
        predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)    
        
    if i % 2000 == 0:
        print("{:5} | {:10.6f}".format(i, cost.numpy()))

    0 |   1.049613
 2000 |   0.703357
 4000 |   0.702705
 6000 |   0.702131
 8000 |   0.701603
10000 |   0.701114


In [22]:
y_Predicted = predicted.numpy().flatten()

In [23]:
len(y_Predicted)

63

In [24]:
y.shape

(63,)

In [25]:
y_Actual = y.flatten()

In [26]:
data = {'y_Actual': y_Actual,
        'y_Predicted': y_Predicted}

In [27]:
#data

In [28]:
df = pd.DataFrame(data, columns = ['y_Actual', 'y_Predicted'])

In [29]:
cross = pd.crosstab(df['y_Actual'], df['y_Predicted'], rownames = ['Actual'], colnames=['Predicted'])

In [30]:
cross

Predicted,0.0,1.0
Actual,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,21,11
1.0,13,18


In [31]:
cross.index

Float64Index([0.0, 1.0], dtype='float64', name='Actual')

In [32]:
confusion_matrix = np.zeros([2,2])

In [33]:
try : 
    confusion_matrix[1,1] = cross.loc[1,1]
    confusion_matrix[0,1] = cross.loc[0,1]
    confusion_matrix[1,0] = cross.loc[1,0]
    confusion_matrix[0,0] = cross.loc[0,0]

except Exception as e:
    print(e)

TP  = confusion_matrix[1,1]
FP  = confusion_matrix[0,1]
FN  = confusion_matrix[1,0]
TN  = confusion_matrix[0,0]

In [34]:
confusion_matrix

array([[21., 11.],
       [13., 18.]])

In [35]:
TOT  = TP + FP + TN + FN

In [36]:
accuracy = (TP + TN)/TOT

In [37]:
accuracy

0.6190476190476191