In [1]:
from sklearn.datasets import load_breast_cancer

X, y = load_breast_cancer(return_X_y=True, as_frame=True)
print(X, y)

     mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0          17.99         10.38          122.80     1001.0          0.11840   
1          20.57         17.77          132.90     1326.0          0.08474   
2          19.69         21.25          130.00     1203.0          0.10960   
3          11.42         20.38           77.58      386.1          0.14250   
4          20.29         14.34          135.10     1297.0          0.10030   
..           ...           ...             ...        ...              ...   
564        21.56         22.39          142.00     1479.0          0.11100   
565        20.13         28.25          131.20     1261.0          0.09780   
566        16.60         28.08          108.30      858.1          0.08455   
567        20.60         29.33          140.10     1265.0          0.11780   
568         7.76         24.54           47.92      181.0          0.05263   

     mean compactness  mean concavity  mean concave points  mea

In [2]:
from sklearn.model_selection import train_test_split

X_train, X_valid, y_train, y_valid = train_test_split(X, y, train_size=0.8, random_state=2023)

In [3]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaled_X_train = scaler.fit_transform(X_train)
scaled_X_valid = scaler.transform(X_valid)

print(X_train.values[0:2])
print(scaled_X_train[0:2])

[[2.175e+01 2.099e+01 1.473e+02 1.491e+03 9.401e-02 1.961e-01 2.195e-01
  1.088e-01 1.721e-01 6.194e-02 1.167e+00 1.352e+00 8.867e+00 1.568e+02
  5.687e-03 4.960e-02 6.329e-02 1.561e-02 1.924e-02 4.614e-03 2.819e+01
  2.818e+01 1.959e+02 2.384e+03 1.272e-01 4.725e-01 5.807e-01 1.841e-01
  2.833e-01 8.858e-02]
 [1.234e+01 1.495e+01 7.829e+01 4.691e+02 8.682e-02 4.571e-02 2.109e-02
  2.054e-02 1.571e-01 5.708e-02 3.833e-01 9.078e-01 2.602e+00 3.015e+01
  7.702e-03 8.491e-03 1.307e-02 1.030e-02 2.970e-02 1.432e-03 1.318e+01
  1.685e+01 8.411e+01 5.331e+02 1.048e-01 6.744e-02 4.921e-02 4.793e-02
  2.298e-01 5.974e-02]]
[[ 2.08107858  0.41696552  2.19118309  2.25631108 -0.16976244  1.70987722
   1.57263893  1.47929251 -0.32425059 -0.11782623  2.58958933  0.21561916
   2.78411691  2.36576467 -0.44551207  1.38621647  1.07342207  0.58857721
  -0.15257254  0.30594801  2.39389404  0.43463953  2.55468443  2.53243357
  -0.21386067  1.39682258  1.46333875  1.02856198 -0.08936938  0.27299817]
 [-0.5

In [4]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

classifier = SVC()
classifier.fit(scaled_X_train, y_train)

train_pred = classifier.predict(scaled_X_train)
valid_pred = classifier.predict(scaled_X_valid)

train_acc = accuracy_score(y_true=y_train, y_pred=train_pred)
valid_acc = accuracy_score(y_true=y_valid, y_pred=valid_pred)

print("Train Accuracy [%]:", train_acc * 100)
print("Valid Accuracy [%]:", valid_acc * 100)

Train Accuracy [%]: 98.24175824175823
Valid Accuracy [%]: 97.36842105263158


In [5]:
import joblib

joblib.dump(scaler, "Model/scaler.joblib")
joblib.dump(classifier, "Model/classifier.joblib")

['Model/classifier.joblib']

In [1]:
import joblib
import pandas as pd
import psycopg2
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC

# PGPASSWORD=qwer123! psql -h localhost -p 1234 -U zerohertz -d Breast_Cancer
db_connect = psycopg2.connect(host="localhost", database="Breast_Cancer", user="zerohertz", password="qwer123!", port="1234")
df = pd.read_sql("SELECT * FROM breast_cancer_data ORDER BY id DESC LIMIT 100", db_connect)
X, y = df.drop(["id", "timestamp", "target"], axis="columns"), df["target"]
X_train, X_valid, y_train, y_valid = train_test_split(X, y, train_size=0.8, random_state=2023)

model_pipeline = Pipeline([("scaler", StandardScaler()), ("svc", SVC())])
model_pipeline.fit(X_train, y_train)

train_pred = model_pipeline.predict(X_train)
valid_pred = model_pipeline.predict(X_valid)

train_acc = accuracy_score(y_true=y_train, y_pred=train_pred)
valid_acc = accuracy_score(y_true=y_valid, y_pred=valid_pred)

print("Train Accuracy :", train_acc)
print("Valid Accuracy :", valid_acc)

joblib.dump(model_pipeline, "Etc./db_pipeline.joblib")

df.to_csv("Etc./DB.csv", index=False)

Train Accuracy : 0.9875
Valid Accuracy : 0.95


  df = pd.read_sql("SELECT * FROM breast_cancer_data ORDER BY id DESC LIMIT 100", db_connect)


In [1]:
import joblib
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

df = pd.read_csv("Etc./DB.csv")
X, y = df.drop(["id", "timestamp", "target"], axis="columns"), df["target"]
X_train, X_valid, y_train, y_valid = train_test_split(X, y, train_size=0.8, random_state=2023)

pipeline_load = joblib.load("Etc./db_pipeline.joblib")

load_train_pred = pipeline_load.predict(X_train)
load_valid_pred = pipeline_load.predict(X_valid)

load_train_acc = accuracy_score(y_true=y_train, y_pred=load_train_pred)
load_valid_acc = accuracy_score(y_true=y_valid, y_pred=load_valid_pred)

print("Load Model Train Accuracy :", load_train_acc)
print("Load Model Valid Accuracy :", load_valid_acc)

Load Model Train Accuracy : 0.9875
Load Model Valid Accuracy : 0.95
