# Installing the prerequisites

#### For installing scikit-learn library, use the command below:

$ pip3 install -U scikit-learn

#### For installing Scipy, use the command below:

$ python -m pip install scipy

#### For installing Pandas:

$ python -m pip install scipy

#### Also, we may need Seaborn:

$ pip install seaborn

#### And I assume everyone have Matplotlib and Numpy on their system.

# Importing Dataset

In [None]:
import sklearn

In [None]:
from sklearn.datasets import load_iris

In [None]:
load_iris()

In [None]:
load_iris(return_X_y = True)

In [None]:
X, y = load_iris(return_X_y = True)

# Data Cleaning and Pre-processing

In [None]:
import pandas as pd

In [None]:
# to load dataset, we need this functionality

from sklearn.datasets  import fetch_openml

In [None]:
# Import dataset using fetch_openml and store it inside the dataframe

df = fetch_openml('titanic', version = 1, as_frame = True)['data']

In [None]:
df.info()

In [None]:
df.isnull()

In [None]:
df.isnull().sum()

In [None]:
import seaborn as sns

In [None]:
sns.set()
miss_val_per = pd.DataFrame((df.isnull().sum()/len(df))*100)
miss_val_per.plot(kind = 'bar', title = 'Missing value in percentage', ylabel = 'percentage')

In [None]:
print(f'size of dataset: {df.shape}')

In [None]:
df.drop(['body'], axis = 1, inplace = True)
print(f'Size of the dataset after dropping a feature: {df.shape}')

In [None]:
from sklearn.impute import SimpleImputer

In [None]:
print(f'Number of Null values before imputing: {df.age.isnull().sum()}')

In [None]:
imp = SimpleImputer(strategy = 'mean')
df['age'] = imp.fit_transform(df[['age']])
print(f'Number of null values in age column after imputation: {df.age.isnull().sum()}')

In [None]:
def get_parameters(df):
    parameters = {}
    for col in df.columns[df.isnull().any()]:
        
        if df[col].dtype == 'float64' or df[col].dtype == 'int64' or df[col].dtype == 'int32' :
            strategy = 'mean'
        else:
            strategy = 'most_frequent'
            
        missing_values = df[col][df[col].isnull()].values[0]
        parameters[col] = {'missing_values': missing_values, 'strategy': strategy}
    return parameters

get_parameters(df)

In [None]:
parameters = get_parameters(df)

In [None]:
for col, param in parameters.items():
    missing_values = param['missing_values']
    strategy = param['strategy']
    imp = SimpleImputer(missing_values = missing_values, strategy = strategy)
    df[col] = imp.fit_transform

In [None]:
df.isnull().sum()

In [None]:
 df.head()

In [None]:
df['family'] = df['sibsp'] + df['parch']
df.loc[df['family'] > 0, 'travelled_alone'] = 0
df.loc[df['family'] == 0, 'travelled_alone'] = 1
df['travelled_alone'].value_counts().plot(title = 'Passanger travelled alone?', kind = 'bar')

In [None]:
from sklearn.preprocessing import OneHotEncoder
df[['female', 'male']] = OneHotEncoder().fit_transform(df[['sex']]).toarray()
df[['sex', 'female', 'male']]

In [None]:
df['sex'] = OneHotEncoder().fit_transform(df[['sex']]).toarray()[:,1]

In [None]:
df.head()

In [None]:
 from sklearn.preprocessing import StandardScaler

In [None]:
num_cols = df.select_dtypes(include=['int64', 'float64', 'int32']).columns
print(num_cols)

In [None]:
ss = StandardScaler()

In [None]:
df[num_cols] = ss.fit_transform(df[num_cols])
df[num_cols].describe()

In [None]:
from sklearn.preprocessing import MinMaxScaler
minmax = MinMaxScaler()
df[num_cols] = minmax.fit_transform(df[num_cols])
df[num_cols]

In [None]:
import numpy as np
x = np.array([[0, 0], [0, 1], [1,0], [1, 1]])
y = np.array([0, 1, 1, 1])
x

In [None]:
y

In [None]:
w = np.array([1, 1])

In [None]:
w

In [None]:
b = -0.5

In [None]:
pred = []

In [None]:
def activation(z):
    if z >= 0:
        return 1
    else:
        return 0

In [None]:
for a in x:
    y_hat = np.dot(a, w) + b
    pred.append(activation(y_hat))

In [None]:
pred

In [None]:
import math
import numpy as np

In [None]:
epochs = 100
alpha = 0.2

In [None]:
w0 = np.random.random()
w1 = np.random.random()
w2 = np.random.random()
print("initial wights: ")
print("w0 = ", w0, "w1 = ", w1, "w2 = ", w2)

In [None]:
del_w0 = 1
del_w1 = 1
del_w2 = 1

In [None]:
train_data_temp = [[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 1], [1, 0, 0], [1, 0, 1], [1, 1, 0], [1, 1, 1]]
train_data = np.asarray(train_data_temp)
op = np.array([0, 1, 1, 1, 1, 1, 1, 1])

In [None]:
train_data

In [None]:
op

In [None]:
bias = 0
for i in range(epochs):
    j = 0
    for x in train_data:
        y_hat = w0*x[0] + w1*x[1] + w2*x[2] + bias
        if (y_hat >= 0):
            act = 1
        else:
            act = 0
        err = op[j] - act
        del_w0 = alpha*x[0]*err
        del_w1 = alpha*x[1]*err
        del_w2 = alpha*x[2]*err
        
        w0 = w0 + del_w0
        w1 = w1 + del_w1
        w2 = w2 + del_w2
        
        j = j + 1
        
        #print('epoch', i+1, 'error = ', err)
        #print(del_w0, del_w1, del_w2)

print('\nFinal Weights = ')
print('w0 = ', w0, 'w1 = ', w1, 'w2 = ', w2)

# Pre-processing