In [1]:
import pandas as pd
import numpy as np
import random
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder

In [2]:
data = {
    'age':[random.randint(20,60) for _ in range (100)],
    'gender':[random.choice(['Male', 'Female'])for _ in range(100)],
    'income':[random.randint(20000,100000) for _ in range(100)]
}
df = pd.DataFrame(data)
df.to_csv('data.csv', index=False)

In [3]:
data=pd.read_csv('data.csv')
data.head(20) #head gives 1st 5 reading by default, here it will give 20

Unnamed: 0,age,gender,income
0,35,Male,20373
1,29,Female,60927
2,50,Male,95729
3,39,Male,56291
4,51,Female,84349
5,31,Male,94476
6,41,Female,56340
7,53,Male,92259
8,42,Male,28827
9,48,Male,23901


In [4]:
data.isnull().sum()
#tells number of null values in each column

#data.dropna(how='any',inplace = True)
#above line removes rows with 1 or more null values

age       0
gender    0
income    0
dtype: int64

In [5]:
data.shape
#used to check shape of data

(100, 3)

In [6]:
#Label Encoding of gender
le=LabelEncoder()
data[['gender']]=data[['gender']].apply(le.fit_transform)
data.head(20)

Unnamed: 0,age,gender,income
0,35,1,20373
1,29,0,60927
2,50,1,95729
3,39,1,56291
4,51,0,84349
5,31,1,94476
6,41,0,56340
7,53,1,92259
8,42,1,28827
9,48,1,23901


In [7]:
#separating label from data
y=data["gender"]
x=data.drop(["gender"], axis=1)
print(x.shape)
print(y.shape)

(100, 2)
(100,)


In [8]:
#split the data into training and testing data
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test=train_test_split(x,y,test_size=0.3, random_state=2)
#0.3 test_size means 30% data will go to test and remaining 70% will be train
#2 random_state means every 2nd data will be picked, this means we can recreate this random splitting next time as well by using random_state=2

In [9]:
print('Shape of x_train is:', x_train.shape)
print('Shape of x_test is:', x_test.shape)
print('Shape of y_train is:', y_train.shape)
print('Shape of y_test is:', y_test.shape)

Shape of x_train is: (70, 2)
Shape of x_test is: (30, 2)
Shape of y_train is: (70,)
Shape of y_test is: (30,)


In [10]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [11]:
print(x_train)

[[ 0.31975782 -0.74136053]
 [-1.10138805  0.07044128]
 [ 1.47443884 -1.13870903]
 [ 0.40857944 -0.2966774 ]
 [-1.45667451 -0.43161077]
 [-1.72313936 -0.77950768]
 [-0.03552865 -0.12376624]
 [-0.12435026  0.12361866]
 [ 0.67504429 -1.39693262]
 [-0.74610158 -0.38897572]
 [ 0.58622267 -0.07012313]
 [ 0.31975782  0.74332989]
 [ 1.2967956   1.21756542]
 [-0.65727996 -0.98938051]
 [ 0.14211459 -0.12486704]
 [-1.72313936  1.67821023]
 [ 1.20797399  0.17997144]
 [ 0.94150914 -0.56557034]
 [-0.74610158  0.53570101]
 [-0.03552865 -0.60261669]
 [ 0.58622267 -1.49719061]
 [ 0.58622267  0.04664694]
 [ 0.85268752  0.35910638]
 [-0.47963673 -1.64901711]
 [-1.54549613 -1.18413843]
 [ 0.67504429  1.40775076]
 [-1.54549613 -1.16491667]
 [ 1.65208207  0.31951972]
 [ 0.31975782  1.57206717]
 [ 0.05329297 -1.28863028]
 [ 0.49740105  0.46330187]
 [-1.10138805  0.19318111]
 [ 0.94150914  0.14368719]
 [-1.27903128 -0.41039909]
 [ 1.38561722  0.38124951]
 [ 0.94150914 -1.13765056]
 [-1.3678529  -0.98806801]
 

In [12]:
print(x_test)

[[-1.81196098 -0.17376822]
 [ 1.65208207  1.68828684]
 [-1.10138805 -0.93112249]
 [-0.21317188  0.0903828 ]
 [ 1.03033075 -0.26221371]
 [-1.9007826  -0.30252014]
 [ 0.7638659   1.54391195]
 [-0.47963673  1.2039747 ]
 [-0.12435026  1.4737991 ]
 [ 1.38561722  0.93258378]
 [-0.65727996  1.20456744]
 [ 0.49740105  1.59679296]
 [ 1.65208207 -0.32136085]
 [ 1.47443884  0.61445095]
 [-0.56845835 -1.64656147]
 [ 1.11915237 -0.3262298 ]
 [-0.21317188 -0.12584083]
 [ 1.65208207  1.59649659]
 [ 0.85268752  1.60801271]
 [-1.10138805 -0.80004194]
 [ 0.31975782 -0.10848198]
 [-0.3019935   1.24182548]
 [-0.12435026  1.68803281]
 [ 1.03033075  1.37527699]
 [ 0.67504429  0.96730149]
 [-0.92374481  1.49086158]
 [-1.54549613 -0.69690492]
 [ 1.56326045  0.55492277]
 [ 0.2309362   0.09161062]
 [ 1.11915237 -1.52098495]]


In [13]:
# Sequential model to initialize our ann and dense module to build the layers
from keras.models import Sequential
from keras.layers import Dense

ModuleNotFoundError: No module named 'keras'

In [None]:
classifier = Sequential()
# Adding the input layer and the first hidden layer
classifier.add(Dense(units = 8, kernel_initializer = 'uniform', activation = 'relu', input_dim = 2))

# Adding the Second hidden layer
classifier.add(Dense(units = 16, kernel_initializer = 'uniform', activation = 'relu'))

# Adding the Third hidden layer
classifier.add(Dense(units = 32, kernel_initializer = 'uniform', activation = 'relu'))

# Adding the Output layer
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'softmax'))

In [None]:
classifier.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

# Fitting the ann to the training set
classifier.fit(x_train, y_train, batch_size = 16, epochs = 50, verbose = 1)