# Detecting Whether a Tumor is Benign or Malignant With Machine Learning
# (IN TURKISH :makina öğrenmesi ile tümörun iyi huylu mu kötü huylu mu olduğunu tespit etme)

In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt

data=pd.read_csv("../input/data-csv/data.csv")
data.drop(["Unnamed: 32","id"],axis=1,inplace =True)
data.diagnosis =[1 if each == "M"else 0 for each in data.diagnosis]
print(data.info())

y=data.diagnosis.values
x_data=data.drop(["diagnosis"],axis=1)


x=(x_data -np.min(x_data))/(np.max(x_data)-np.min(x_data)).values

from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test =train_test_split(x,y,test_size = 0.2 ,random_state=42)


x_train=x_train.T
x_test=x_test.T
y_train=y_train.T
y_test=y_test.T


print("x_train :",x_train.shape)
print("x_test :",x_test.shape)
print("y_train :",y_train.shape)
print("y_test :",y_test.shape)

#parameter initialize and sigmoid function
#dimension =30


def initialize_weights_and_bias(dimension):
    
    w=np.full((dimension),0.01)
    b=0.0
    return w,b

#w,b =initialize_weights_and_bias(30)
def sigmoid(z):
    
    y_head=1/(1+np.exp(-z))
    return y_head
#print(sigmoid(0))

def forward_backward_propagation(w,b,x_train,y_train):
    #forward propagation
    z=np.dot(w.T,x_train)+b # z = b + px1w1 + px2w2 + .... + px4096*w4096
    y_head=sigmoid(z)
    loss=-y_train*np.log(y_head)-(1-y_train)*np.log(1-y_head) 
    # yukardaki loss fonksiyonu ==> -(1-y)log(1-y_head)+y*log(y_head)
    cost=(np.sum(loss))/x_train.shape[1]
    
    
    #backward propagation
    derivate_weight=(np.dot(x_train,((y_head-y_train).T)))/x_train.shape[1]#türev
    derivate_bias=np.sum(y_head-y_train)/x_train.shape[1]#türev
    gradients={"derivate_weight": derivate_weight,"derivate_bias": derivate_bias}
    
    return cost,gradients
    
# Updating(learning) parameters
def update(w, b, x_train, y_train, learning_rate,number_of_iterarion):#learning_rate = ögreneme katsayisi ,number_of_iterarion = kac iterayon oldugu
    cost_list = [] #tüm costlari depolamak icin
    cost_list2 = [] # 10 adimda bir depoamak icin
    index = []
    # updating(learning) parameters is number_of_iterarion times
    for i in range(number_of_iterarion):
        # make forward and backward propagation and find cost and gradients
        cost,gradients = forward_backward_propagation(w,b,x_train,y_train)#gradients=türev demektir.
        cost_list.append(cost)
        # lets update
        w = w - learning_rate * gradients["derivative_weight"]
        b = b - learning_rate * gradients["derivative_bias"]
        if i % 10 == 0:# 10 degisebilir istege bagli
            cost_list2.append(cost)
            index.append(i)
            print ("Cost after iteration %i: %f" %(i, cost))
    # we update(learn) parameters weights and bias
    parameters = {"weight": w,"bias": b}
    plt.plot(index,cost_list2)
    plt.xticks(index,rotation='vertical')
    plt.xlabel("Number of Iterarion")
    plt.ylabel("Cost")
    plt.show()
    return parameters, gradients, cost_list
#parameters, gradients, cost_list = update(w, b, x_train, y_train, learning_rate = 0.009,number_of_iterarion = 200)


 # prediction(TEST DATASINI KULLANMALIYIZ PREDİCT EDERKEN)
def predict(w,b,x_test):
    # x_test is a input for forward propagation
    z = sigmoid(np.dot(w.T,x_test)+b)
    Y_prediction = np.zeros((1,x_test.shape[1]))
    # if z is bigger than 0.5, our prediction is sign one (y_head=1),
    # if z is smaller than 0.5, our prediction is sign zero (y_head=0),
    for i in range(z.shape[1]):
        if z[0,i]<= 0.5:
            Y_prediction[0,i] = 0
        else:
            Y_prediction[0,i] = 1

    return Y_prediction
# predict(parameters["weight"],parameters["bias"],x_test)
#dimension'i kac tane fature varsa ona eşitlemeliyiz
def logistic_regression(x_train, y_train, x_test, y_test, learning_rate ,  num_iterations):
    # initialize
    dimension =  x_train.shape[0]  # that is 4096
    w,b = initialize_weights_and_bias(dimension)
    # do not change learning rate
    parameters, gradients, cost_list = update(w, b, x_train, y_train, learning_rate,num_iterations)
    
    y_prediction_test = predict(parameters["weight"],parameters["bias"],x_test)
    y_prediction_train = predict(parameters["weight"],parameters["bias"],x_train)

    # Print train/test Errors
    print("train accuracy: {} %".format(100 - np.mean(np.abs(y_prediction_train - y_train)) * 100))
    print("test accuracy: {} %".format(100 - np.mean(np.abs(y_prediction_test - y_test)) * 100))
    
logistic_regression(x_train, y_train, x_test, y_test,learning_rate = 1, num_iterations = 300)

#sklearn with LR
from sklearn.linear.model import LogisticRegression
lr=LogisticRegression()
lr.fit(x_train.T,y_train.T)
print("test accurary {}".format(lr.score(x_test.T,y_test.T)))
    
    
    
    
    
    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568
Data columns (total 31 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   diagnosis                569 non-null    int64  
 1   radius_mean              569 non-null    float64
 2   texture_mean             569 non-null    float64
 3   perimeter_mean           569 non-null    float64
 4   area_mean                569 non-null    float64
 5   smoothness_mean          569 non-null    float64
 6   compactness_mean         569 non-null    float64
 7   concavity_mean           569 non-null    float64
 8   concave points_mean      569 non-null    float64
 9   symmetry_mean            569 non-null    float64
 10  fractal_dimension_mean   569 non-null    float64
 11  radius_se                569 non-null    float64
 12  texture_se               569 non-null    float64
 13  perimeter_se             569 non-null    float64
 14  area_se                  5

KeyError: 'derivative_weight'