# 임의좌표 생성

In [None]:
import numpy as np
def generate_xy(num=100):
  X = np.random.rand(num,2)
  X *= 10

  return X

In [None]:
import matplotlib.pyplot as plt
X = generate_xy(100)
plt.scatter(X[:,0], X[:,1])

## 좌표별로 labeling 하기

```
0: if y>f(x)
1: if y<=f(x)
```

In [None]:
def labeling(X, func):
  labels = list()
  for x in X:
    labels.append(
        0 if x[1] > func(x[0]) else 1
    )
  return np.array(labels)

In [None]:
labels = labeling(X, lambda x: x)
labels

## pandas 활용하여 데이터 처리 간소화

In [None]:
import pandas as pd
df_X = pd.DataFrame(X, columns=["x", "y"])
df_label = pd.DataFrame(labels, columns=["label"])
df = pd.concat([df_X, df_label], axis=1)
df

In [None]:
import seaborn as sb
sb.scatterplot(data=df, x="x", y="y", hue="label")

## 데이터 생성 및 labeling 코드 취합

In [None]:
import numpy as np
import pandas as pd

def generate(func, num=100, max=10):
  X = np.random.rand(num,2)
  X *= max

  labels = list()
  for x in X:
    labels.append(
        0 if x[1] > func(x[0]) else 1
    )

  df_X = pd.DataFrame(X, columns=["x", "y"])
  df_label = pd.DataFrame(labels, columns=["label"])
  df = pd.concat([df_X, df_label], axis=1)

  return df

# 단계1. 데이터 생성

In [None]:
import math

def func1(x):
  return 0.03 * math.pow(x-4,3) + 3

def func2(x):
  return 0.7 * math.pow(x-4,2) + 1

df = generate(func1, 100, 10)
#df = generate(func2, 100, 10)
sb.scatterplot(data=df, x="x", y="y", hue="label")

https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html

### C: float, default=1.0
> Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. The penalty is a squared l2 penalty.

### kernel: {‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’} or callable, default=’rbf’
> Specifies the kernel type to be used in the algorithm. If none is given, ‘rbf’ will be used. If a callable is given it is used to pre-compute the kernel matrix from data matrices; that matrix should be an array of shape (n_samples, n_samples).

### degree: int, default=3
> Degree of the polynomial kernel function (‘poly’). Ignored by all other kernels.


# 단계2. SVM으로 classifier 학습

In [None]:
from sklearn.svm import SVC
import numpy as np

def svm(data, kernel="linear", degree=3, C=1):
  classifier = SVC(kernel=kernel, degree=degree, C=C)
  classifier.fit(data[["x","y"]],data["label"])

  return classifier

In [None]:
from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plot
import numpy as np

def draw_graph(classifier, data):
  x_min, x_max = data["x"].min() - 1, data["x"].max() + 1
  y_min, y_max = data["y"].min() - 1, data["y"].max() + 1
  h = 0.04
  xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
  Z = classifier.predict(np.c_[xx.ravel(), yy.ravel()])
  Z = Z.reshape(xx.shape)
  plot.figure(figsize=(8, 6))

  colormap_light = ListedColormap(["yellow", "orange"])
  plot.contourf(xx, yy, Z, colormap=colormap_light)

  colormap_bold = ["pink", "c"]
  sb.scatterplot(
      data=data,
      x="x", y="y",hue="label",
      palette=colormap_bold,
      alpha=1.0,
      edgecolor="brown",
  )
  plot.xlim(xx.min(), xx.max())
  plot.ylim(yy.min(), yy.max())

  y_predict = classifier.predict(data[["x","y"]])
  hit_count=0
  for i in range(len(df)):
    if y_predict[i] == data['label'].iloc[i]:
      hit_count += 1

  plot.title(
      f"{str(classifier)} classification "
      f"\nacc={hit_count/len(data)*100:.1f} % ({hit_count}/{len(data)})"
  )
  # plot.xlabel(iris.feature_names[0])
  # plot.ylabel(iris.feature_names[1])

# 단계3. 다양한 하이퍼파라미터(kernel, degree, C 등) 적용

In [None]:
classifier = svm(data=generate(func1, 100, 10))
draw_graph(classifier=classifier, data=generate(func1, 100, 10))

In [None]:
classifier = svm(data=generate(func1, 100, 10), kernel="poly", degree=3)
draw_graph(classifier=classifier, data=generate(func1, 100, 10))

In [None]:
classifier = svm(data=generate(func1, 100, 10), kernel="poly", degree=3, C=10)
draw_graph(classifier=classifier, data=generate(func1, 100, 10))

In [None]:
classifier = svm(data=df, kernel="poly", degree=5, C=10)
draw_graph(classifier=classifier, data=df)

In [None]:
classifier = svm(data=generate(func1, 100, 10), kernel="rbf")
draw_graph(classifier=classifier, data=generate(func1, 100, 10))

In [None]:
classifier = svm(data=df, kernel="rbf", C=10)
draw_graph(classifier=classifier, data=df)

# 부록. KNN 으로 돌려보기

In [None]:
from sklearn import neighbors

classifier = neighbors.KNeighborsClassifier(n_neighbors=5)
classifier.fit(df[["x", "y"]], df["label"])
draw_graph(classifier=classifier, data=df)

# 부록. 2차원 리스트 예쁘게 출력하기

In [None]:
import numpy as num
import matplotlib.pyplot as plot
import seaborn as sb
from matplotlib.colors import ListedColormap
from sklearn import neighbors, datasets

In [None]:
iris = datasets.load_iris()
#iris

In [None]:
X = iris.data[:, :2]
y = iris.target
print(X[:10],y[:10])

## 부록1. 2차원 리스트 예쁘게 출력하기 - 1

In [None]:
print("X.Shape: ", X.shape)
for r in range(X.shape[0]):
  for c in range(X.shape[1]):
    print(X[r][c], end=" ")
  print() # new line

## 부록2.2차원 리스트 예쁘게 출력하기 - 2 (pandas 활용)

In [None]:
import pandas as pd
df = pd.DataFrame(X)
df

In [None]:
df.describe()

In [None]:
 df.head()

In [None]:
df.tail()

In [None]:
df.iloc[5:10, :]

In [None]:
df.to_csv("data.csv")