In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder
import pickle

In [6]:
# Data as a dictionary
data = {
    "name": ["John Doe", "Jane Smith", "Bob Johnson", "Alice Brown", "Charlie Davis", 
             "Emily White", "David Wilson", "Lily Adams", "Michael Harris", "Susan Clark"],
    "age": [35, 28, 42, 30, 60, 25, 38, 33, 55, 47],
    "gender": ["Male", "Female", "Male", "Female", "Male", "Female", "Male", "Female", "Male", "Female"],
    "Geography": ["New York", "California", "Texas", "Florida", "Washington", 
                  "Georgia", "Illinois", "California", "Ohio", "New York"],
    "Exited": [1, 0, 1, 0, 1, 0, 1, 0, 1, 0]
}

# Creating DataFrame
df = pd.DataFrame(data)

# Saving to CSV
csv_file_path = '/home/enterpi/Desktop/ANN Project/sample.csv'
df.to_csv(csv_file_path)


In [7]:
data=pd.read_csv('sample.csv')
data

Unnamed: 0.1,Unnamed: 0,name,age,gender,Geography,Exited
0,0,John Doe,35,Male,New York,1
1,1,Jane Smith,28,Female,California,0
2,2,Bob Johnson,42,Male,Texas,1
3,3,Alice Brown,30,Female,Florida,0
4,4,Charlie Davis,60,Male,Washington,1
5,5,Emily White,25,Female,Georgia,0
6,6,David Wilson,38,Male,Illinois,1
7,7,Lily Adams,33,Female,California,0
8,8,Michael Harris,55,Male,Ohio,1
9,9,Susan Clark,47,Female,New York,0


In [8]:
data=pd.DataFrame(data)
data=data.drop(['name'],axis=1)
data=data.drop_duplicates()
data

Unnamed: 0.1,Unnamed: 0,age,gender,Geography,Exited
0,0,35,Male,New York,1
1,1,28,Female,California,0
2,2,42,Male,Texas,1
3,3,30,Female,Florida,0
4,4,60,Male,Washington,1
5,5,25,Female,Georgia,0
6,6,38,Male,Illinois,1
7,7,33,Female,California,0
8,8,55,Male,Ohio,1
9,9,47,Female,New York,0


In [9]:
# convert the gender column data into vector
label_encoder=LabelEncoder()
data['gender']=label_encoder.fit_transform(data['gender'])
data

Unnamed: 0.1,Unnamed: 0,age,gender,Geography,Exited
0,0,35,1,New York,1
1,1,28,0,California,0
2,2,42,1,Texas,1
3,3,30,0,Florida,0
4,4,60,1,Washington,1
5,5,25,0,Georgia,0
6,6,38,1,Illinois,1
7,7,33,0,California,0
8,8,55,1,Ohio,1
9,9,47,0,New York,0


In [10]:
data.columns
data=data.drop('Unnamed: 0',axis=1)
data

Unnamed: 0,age,gender,Geography,Exited
0,35,1,New York,1
1,28,0,California,0
2,42,1,Texas,1
3,30,0,Florida,0
4,60,1,Washington,1
5,25,0,Georgia,0
6,38,1,Illinois,1
7,33,0,California,0
8,55,1,Ohio,1
9,47,0,New York,0


In [11]:
# data having geography column it is having different country names then we need to use one hot encoder technique to convert words into vectors
from sklearn.preprocessing import OneHotEncoder
one_hot_encoder=OneHotEncoder()
geo_encoder=one_hot_encoder.fit_transform(data[['Geography']])
one_hot_encoder.get_feature_names_out(['Geography'])


array(['Geography_California', 'Geography_Florida', 'Geography_Georgia',
       'Geography_Illinois', 'Geography_New York', 'Geography_Ohio',
       'Geography_Texas', 'Geography_Washington'], dtype=object)

In [12]:
# making above dataset as a seperate table
geo_encoder_df=pd.DataFrame(geo_encoder.toarray(),columns=one_hot_encoder.get_feature_names_out(['Geography']))

In [13]:
print(data.columns)
geo_encoder_df

Index(['age', 'gender', 'Geography', 'Exited'], dtype='object')


Unnamed: 0,Geography_California,Geography_Florida,Geography_Georgia,Geography_Illinois,Geography_New York,Geography_Ohio,Geography_Texas,Geography_Washington
0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
5,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
7,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
9,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [14]:
# drop the Geography column and concat both data and geo_encoder_df tables 
data=pd.concat([data.drop('Geography',axis=1),geo_encoder_df],axis=1)

In [15]:
data.columns

Index(['age', 'gender', 'Exited', 'Geography_California', 'Geography_Florida',
       'Geography_Georgia', 'Geography_Illinois', 'Geography_New York',
       'Geography_Ohio', 'Geography_Texas', 'Geography_Washington'],
      dtype='object')

In [17]:
import pickle
scaler=StandardScaler()
# save the encoders and scallers
with open('label_encoder.pkl','wb') as file:
    pickle.dump(label_encoder,file)
with open('one_hot_encoder.pkl','wb') as file:
    pickle.dump(one_hot_encoder,file)
with open('scaler.pkl','wb')as file:
    pickle.dump(scaler,file)


In [18]:

# divide data set into independent and dependent features

x=data.drop('Exited',axis=1)
y=data['Exited']



In [19]:
# split the data in training and testing features

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=42)

In [20]:

x_test


Unnamed: 0,age,gender,Geography_California,Geography_Florida,Geography_Georgia,Geography_Illinois,Geography_New York,Geography_Ohio,Geography_Texas,Geography_Washington
8,55,1,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,28,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,25,0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [21]:
# Build our model
import tensorflow
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
model=Sequential([
    Dense(64,activation='relu',input_shape=(x_train.shape[1],)), # hidden layer 1
    Dense(32,activation='relu'),  # hidden layer 2
    Dense(1,activation='sigmoid')  # output layer
])

2025-02-21 18:34:07.446106: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-02-21 18:34:07.470454: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-02-21 18:34:07.512525: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1740143047.588140   21903 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1740143047.608361   21903 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-02-21 18:34:25.276638: E external/local_xla/xla/stream_executor/

In [22]:
# opt=tensorflow.keras.Adam(learning_rate=0.01)
# loss=tensorflow.keras.losses.binary_crossentropy()
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [23]:
model.summary()

In [24]:
import datetime
# setup the tensorflow board
log_dir='log/fit/'+datetime.datetime.now().strftime('%y%m%d-%H%M%S')

In [25]:
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
tensorflow_callback=TensorBoard(log_dir=log_dir,histogram_freq=1)



In [26]:
# setup Early stopping
earlyStopping_callback=EarlyStopping(monitor='val_loss',patience=10,restore_best_weights=True)

In [27]:
# train the model
# history=model.fit(x_train,y_train,validation_data=(x_test,y_test),epochs=100,callbacks=[tensorflow_callback,earlyStopping_callback])
x_train.shape
y_train.shape
# model.input_shape
type(y_train)

pandas.core.series.Series

In [28]:
# save the model
model_path='log/fit/'+ datetime.datetime.now().strftime('%y%m%d-%H%M%S')+'.keras'
model.save(model_path)


In [29]:
%load_ext tensorflow

The tensorflow module is not an IPython extension.


In [30]:
from tensorflow.keras.callbacks import TensorBoard
%load_ext tensorboard
%tensorboard --logdir log/fit/


Launching TensorBoard...

In [31]:
%tensorboard --logdir log/fit/

Reusing TensorBoard on port 6006 (pid 22081), started 0:00:00 ago. (Use '!kill 22081' to kill it.)

In [72]:
model_path

'log/fit/250221-171111.keras'