# 로지스틱 회귀 활용 

In [2]:
import numpy as np
import pandas as pd 
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)

train = pd.read_csv('train_features.csv')
test = pd.read_csv('test_features.csv')
train_label = pd.read_csv('train_labels.csv')
submission = pd.read_csv('sample_submission.csv')

In [4]:
gb_train = train.drop('time', axis = 1, inplace =False).groupby('id')
gb_test = test.drop('time',axis =1, inplace =False).groupby('id')

In [5]:
def mad(x):
    return np.median(np.absolute(x - np.median(x, axis=0)), axis=0)

def energy(x): # enengy
    x = np.array(x)
    return np.sum(np.square(x)) / len(x)

def iqr(x):
    return np.subtract(*np.percentile(x, [75, 25]))

def entropy(x):
    p = x/x.sum()
    return (-p*np.log2(p)).sum()

def skewness(x):
    from scipy.stats import skew 
    return skew(x)

def kurtosis(x):
    from scipy.stats import kurtosis 
    return kurtosis(x)

def angleBet(u, v):
    from numpy import (array, dot, arccos, clip)
    from numpy.linalg import norm

    c = dot(u,v)/norm(u)/norm(v) # -> cosine of the angle
    angle = arccos(clip(c, -1, 1)) # if you really want the angle
    
    return angle

In [6]:
gb_train_agg =  gb_train.agg([np.min, np.max, np.std, np.mean, 
              mad, energy, iqr, entropy, skewness, kurtosis ])
gb_test_agg = gb_test.agg([np.min, np.max, np.std, np.mean, 
              mad, energy, iqr, entropy, skewness, kurtosis ])

In [7]:
def addAngle(dfs):
    # test x, y, z 각 features 간의 angle 
    angle_acc_xy = []
    angle_acc_xz = []
    angle_acc_yz = [] 
    angle_gy_xy = [] 
    angle_gy_xz = [] 
    angle_gy_yz = [] 

    for i in range(len(dfs)):
    # acc x, y, z
        a_xy = angleBet(dfs[i].acc_x, dfs[i].acc_y)
        a_xz = angleBet(dfs[i].acc_x, dfs[i].acc_z)
        a_yz = angleBet(dfs[i].acc_y, dfs[i].acc_z)
        angle_acc_xy = np.append(angle_acc_xy, a_xy)
        angle_acc_xz = np.append(angle_acc_xz, a_xz)
        angle_acc_yz = np.append(angle_acc_yz, a_yz)
    
    # gy x, y, z
        g_xy = angleBet(dfs[i].gy_x, dfs[i].gy_y)
        g_xz = angleBet(dfs[i].gy_x, dfs[i].gy_z)
        g_yz = angleBet(dfs[i].gy_y, dfs[i].gy_z)
        angle_gy_xy = np.append(angle_gy_xy, g_xy)
        angle_gy_xz = np.append(angle_gy_xz, g_xz) 
        angle_gy_yz = np.append(angle_gy_yz, g_yz)
    
    
    angles = {'angle_acc_xy' : angle_acc_xy, 
         'angle_acc_yz' : angle_acc_yz,
         'angle_acc_xz' : angle_acc_xz,
         'angle_gy_xy' : angle_gy_xy,
         'angle_gy_yz': angle_gy_yz,
         'angle_gy_xz': angle_gy_xz}

    angles = pd.DataFrame(angles)
    
    return angles

In [9]:
train_ids = train.id.nunique()
test_ids = test.id.nunique()
train_df = [] 
for i in range(train_ids):
    train_df.append(train.iloc[600*i : 600*i + 600])
    
test_df = []
for i in range(test_ids):
    test_df.append(test.iloc[600*i : 600*i + 600 ])

In [11]:
train_ang = addAngle(train_df)
test_ang = addAngle(test_df)

gb_train_agg.columns = ['_'.join(col) for col in gb_train_agg.columns]
gb_test_agg.columns = ['_'.join(col) for col in gb_test_agg.columns]

gb_train_df = pd.merge(gb_train_agg, train_ang, left_index = True,right_index=True)
gb_test_df = pd.merge(gb_test_agg.reset_index(), test_ang, left_index = True, right_index = True)
gb_test_df.set_index('id', inplace = True)

In [12]:
y = train_label['label']

In [15]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss, accuracy_score
from sklearn.model_selection import train_test_split

In [16]:
X_train, X_test, y_train, y_test = train_test_split(gb_train_df, y, test_size = 0.2,
                                                   random_state = 42)

In [18]:
lr_clf = LogisticRegression().fit(X_train, y_train)

In [19]:
log_loss(y_test, lr_clf.predict_proba(X_test))

2.4208208107993165

In [20]:
accuracy_score(y_test, lr_clf.predict(X_test))

0.5392

In [22]:
from sklearn.svm import SVC

In [23]:
rbf = SVC(kernel='rbf', gamma=0.5, C=0.1).fit(X_train, y_train)
poly = SVC(kernel='poly', degree=3, C=1).fit(X_train, y_train)

In [24]:
poly_pred = poly.predict(X_test)
rbf_pred = rbf.predict(X_test)

In [25]:
accuracy_score(y_test, poly_pred)

0.5248

In [26]:
accuracy_score(y_test, rbf_pred)

0.5024