In [1]:
import numpy as np

class LogisticRegression:

    def __init__(self, learning_rate=0.001, n_iters=1000):
        self.lr = learning_rate
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples, n_features = X.shape

        # init parameters
        self.weights = np.zeros(n_features)
        self.bias = 0

        # gradient descent
        for _ in range(self.n_iters):
            # approximate y with linear combination of weights and x, plus bias
            linear_model = np.dot(X, self.weights) + self.bias
            # apply sigmoid function
            y_predicted = self._sigmoid(linear_model)

            # compute gradients
            dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
            db = (1 / n_samples) * np.sum(y_predicted - y)
            # update parameters
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_predicted = self._sigmoid(linear_model)
        y_predicted_cls = [1 if i > 0.5 else 0 for i in y_predicted]
        return np.array(y_predicted_cls)

    def _sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

In [2]:
from sklearn import datasets
from sklearn.model_selection import train_test_split

In [3]:
def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy

In [5]:
bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1234)

In [6]:
regressor = LogisticRegression(learning_rate=0.0001, n_iters=1000)
regressor.fit(X_train, y_train)
predictions = regressor.predict(X_train)

In [7]:
accuracy(y_train, predictions)

0.9186813186813186

In [8]:
predictions = regressor.predict(X_test)
accuracy(y_test, predictions)

0.9298245614035088

In [9]:
iris = datasets.load_iris()
X = iris.data
y = iris.target

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1234)

In [11]:
regressor = LogisticRegression(learning_rate=0.0001, n_iters=1000)
regressor.fit(X_train, y_train)
predictions = regressor.predict(X_train)

In [12]:
accuracy(y_train, predictions)

0.30833333333333335

In [14]:
predictions = regressor.predict(X_test)
accuracy(y_test, predictions)

0.43333333333333335

In [15]:
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"tanvipenumudy","key":"487bfae60e4b65e36e1a308cf7412e23"}'}

In [16]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [17]:
!kaggle datasets download -d henriqueyamahata/bank-marketing

Downloading bank-marketing.zip to /content
  0% 0.00/393k [00:00<?, ?B/s]
100% 393k/393k [00:00<00:00, 52.7MB/s]


In [18]:
!ls

bank-marketing.zip  kaggle.json  sample_data


In [19]:
!unzip "bank-marketing.zip" -d /tmp

Archive:  bank-marketing.zip
  inflating: /tmp/bank-additional-full.csv  
  inflating: /tmp/bank-additional-names.txt  


In [20]:
import pandas as pd

In [23]:
data = pd.read_csv('/tmp/bank-additional-full.csv', delimiter=';')

In [24]:
data.head()

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,duration,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
0,56,housemaid,married,basic.4y,no,no,no,telephone,may,mon,261,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
1,57,services,married,high.school,unknown,no,no,telephone,may,mon,149,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
2,37,services,married,high.school,no,yes,no,telephone,may,mon,226,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
3,40,admin.,married,basic.6y,no,no,no,telephone,may,mon,151,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
4,56,services,married,high.school,no,no,yes,telephone,may,mon,307,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no


In [25]:
data.describe()

Unnamed: 0,age,duration,campaign,pdays,previous,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed
count,41188.0,41188.0,41188.0,41188.0,41188.0,41188.0,41188.0,41188.0,41188.0,41188.0
mean,40.02406,258.28501,2.567593,962.475454,0.172963,0.081886,93.575664,-40.5026,3.621291,5167.035911
std,10.42125,259.279249,2.770014,186.910907,0.494901,1.57096,0.57884,4.628198,1.734447,72.251528
min,17.0,0.0,1.0,0.0,0.0,-3.4,92.201,-50.8,0.634,4963.6
25%,32.0,102.0,1.0,999.0,0.0,-1.8,93.075,-42.7,1.344,5099.1
50%,38.0,180.0,2.0,999.0,0.0,1.1,93.749,-41.8,4.857,5191.0
75%,47.0,319.0,3.0,999.0,0.0,1.4,93.994,-36.4,4.961,5228.1
max,98.0,4918.0,56.0,999.0,7.0,1.4,94.767,-26.9,5.045,5228.1


In [26]:
data = data.sample(frac=1).reset_index(drop=True)

In [27]:
data

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,duration,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
0,40,technician,married,professional.course,no,yes,no,cellular,may,thu,580,1,999,1,failure,-1.8,92.893,-46.2,1.266,5099.1,no
1,37,blue-collar,married,professional.course,no,yes,no,telephone,may,tue,160,6,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
2,34,housemaid,single,university.degree,no,no,no,telephone,may,fri,62,2,999,0,nonexistent,1.1,93.994,-36.4,4.864,5191.0,no
3,33,blue-collar,married,basic.9y,unknown,unknown,unknown,cellular,may,thu,551,2,999,0,nonexistent,-1.8,92.893,-46.2,1.266,5099.1,no
4,49,self-employed,married,professional.course,unknown,no,no,cellular,aug,tue,432,9,999,0,nonexistent,1.4,93.444,-36.1,4.963,5228.1,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41183,39,services,single,high.school,no,no,no,telephone,may,fri,346,4,999,0,nonexistent,1.1,93.994,-36.4,4.855,5191.0,no
41184,38,management,divorced,university.degree,no,yes,no,cellular,apr,mon,182,2,6,1,success,-1.8,93.749,-34.6,0.645,5008.7,yes
41185,54,blue-collar,married,basic.4y,no,yes,no,cellular,aug,thu,109,2,999,0,nonexistent,1.4,93.444,-36.1,4.964,5228.1,no
41186,27,services,single,professional.course,no,no,no,telephone,jun,fri,106,5,999,0,nonexistent,1.4,94.465,-41.8,4.967,5228.1,no


In [28]:
data.isna().sum()

age               0
job               0
marital           0
education         0
default           0
housing           0
loan              0
contact           0
month             0
day_of_week       0
duration          0
campaign          0
pdays             0
previous          0
poutcome          0
emp.var.rate      0
cons.price.idx    0
cons.conf.idx     0
euribor3m         0
nr.employed       0
y                 0
dtype: int64

In [29]:
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

In [30]:
LE = LabelEncoder()
CateList = data.select_dtypes(include="object").columns
print(CateList)

Index(['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact',
       'month', 'day_of_week', 'poutcome', 'y'],
      dtype='object')


In [31]:
data.head()

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,duration,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
0,40,technician,married,professional.course,no,yes,no,cellular,may,thu,580,1,999,1,failure,-1.8,92.893,-46.2,1.266,5099.1,no
1,37,blue-collar,married,professional.course,no,yes,no,telephone,may,tue,160,6,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
2,34,housemaid,single,university.degree,no,no,no,telephone,may,fri,62,2,999,0,nonexistent,1.1,93.994,-36.4,4.864,5191.0,no
3,33,blue-collar,married,basic.9y,unknown,unknown,unknown,cellular,may,thu,551,2,999,0,nonexistent,-1.8,92.893,-46.2,1.266,5099.1,no
4,49,self-employed,married,professional.course,unknown,no,no,cellular,aug,tue,432,9,999,0,nonexistent,1.4,93.444,-36.1,4.963,5228.1,no


In [32]:
for i in CateList:
    data[i] = LE.fit_transform(data[i])
data.head()

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,duration,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
0,40,9,1,5,0,2,0,0,6,2,580,1,999,1,0,-1.8,92.893,-46.2,1.266,5099.1,0
1,37,1,1,5,0,2,0,1,6,3,160,6,999,0,1,1.1,93.994,-36.4,4.857,5191.0,0
2,34,3,2,6,0,0,0,1,6,0,62,2,999,0,1,1.1,93.994,-36.4,4.864,5191.0,0
3,33,1,1,2,1,1,1,0,6,2,551,2,999,0,1,-1.8,92.893,-46.2,1.266,5099.1,0
4,49,6,1,5,1,0,0,0,1,3,432,9,999,0,1,1.4,93.444,-36.1,4.963,5228.1,0


In [33]:
df = data.iloc[:,:-1]
mm = MinMaxScaler()
df[:]= mm.fit_transform(df[:])

In [34]:
df.head()

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,duration,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed
0,0.283951,0.818182,0.333333,0.714286,0.0,1.0,0.0,0.0,0.666667,0.5,0.117934,0.0,1.0,0.142857,0.0,0.333333,0.26968,0.192469,0.143278,0.512287
1,0.246914,0.090909,0.333333,0.714286,0.0,1.0,0.0,1.0,0.666667,0.75,0.032534,0.090909,1.0,0.0,0.5,0.9375,0.698753,0.60251,0.957379,0.859735
2,0.209877,0.272727,0.666667,0.857143,0.0,0.0,0.0,1.0,0.666667,0.0,0.012607,0.018182,1.0,0.0,0.5,0.9375,0.698753,0.60251,0.958966,0.859735
3,0.197531,0.090909,0.333333,0.285714,0.5,0.5,0.5,0.0,0.666667,0.5,0.112037,0.018182,1.0,0.0,0.5,0.333333,0.26968,0.192469,0.143278,0.512287
4,0.395062,0.545455,0.333333,0.714286,0.5,0.0,0.0,0.0,0.111111,0.75,0.087841,0.145455,1.0,0.0,0.5,1.0,0.484412,0.615063,0.98141,1.0


In [35]:
X = df.values
y = data['y'].values

In [36]:
X_shape = X.shape
X_type  = type(X)
y_shape = y.shape
y_type  = type(y)

In [37]:
print(f'X: Type-{X_type}, Shape-{X_shape}')
print(f'y: Type-{y_type}, Shape-{y_shape}')

X: Type-<class 'numpy.ndarray'>, Shape-(41188, 20)
y: Type-<class 'numpy.ndarray'>, Shape-(41188,)


In [38]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=123)

In [39]:
print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)

(30891, 20) (10297, 20)
(30891,) (10297,)


In [40]:
regressor = LogisticRegression(learning_rate=0.0001, n_iters=1000)
regressor.fit(X_train, y_train)
predictions = regressor.predict(X_train)

In [41]:
accuracy(y_train, predictions)

0.888187497976757

In [42]:
predictions = regressor.predict(X_test)
accuracy(y_test, predictions)

0.8848208215985238