In [1]:
import tensorflow as tf
import numpy as np
import sklearn as sk
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt


# Load Data

In [2]:
df = pd.read_csv('competition_data/Luke_hair_loss_documentation.csv')
df

Unnamed: 0,date,hair_loss,stay_up_late,pressure_level,coffee_consumed,brain_working_duration,school_assesssment,stress_level,shampoo_brand,swimming,hair_washing,hair_grease,dandruff,libido
0,1/01/2021,Few,2,Low,0,1,,Low,Pantene,No,Y,3.0,,1
1,2/01/2021,Few,0,Low,0,3,,Low,Pantene,No,N,1.0,,1
2,3/01/2021,Medium,3,Low,1,0,,Low,Pantene,Yes,Y,2.0,,2
3,4/01/2021,Few,2,Low,0,1,,Low,Pantene,No,N,3.0,,3
4,5/01/2021,Few,2,Low,0,1,,Low,Pantene,No,Y,1.0,,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,31/01/2022,Medium,1,Low,1,2,,Low,Hair & Shoulder,No,N,1.0,,5
396,1/02/2022,Few,1,Low,0,3,,Low,Hair & Shoulder,Yes,Y,2.0,,1
397,2/02/2022,Medium,1,Low,1,1,,Low,Hair & Shoulder,No,N,2.0,,5
398,3/02/2022,Medium,0,Low,1,1,,Low,Hair & Shoulder,No,N,2.0,,5


# Proccessing Data

In [3]:
df['dandruff'].replace('None',0,inplace=True)
df['dandruff'].replace('Few',1,inplace=True)
df['dandruff'].replace('Many',2, inplace=True)

df['hair_loss'].replace("Few", 0, inplace=True)
df['hair_loss'].replace("Medium", 0, inplace=True)
df['hair_loss'].replace("Many", 1, inplace=True)
df['hair_loss'].replace("A lot", 1, inplace=True)

df['swimming'].replace('Yes', 1, inplace=True)
df['swimming'].replace('No', 0, inplace=True)

df.replace('Low', 0, inplace=True)
df.replace('Medium', 1, inplace=True)
df.replace('High', 2, inplace=True)
df.replace('Very High', 3, inplace=True)



df.head()


Unnamed: 0,date,hair_loss,stay_up_late,pressure_level,coffee_consumed,brain_working_duration,school_assesssment,stress_level,shampoo_brand,swimming,hair_washing,hair_grease,dandruff,libido
0,1/01/2021,0,2,0,0,1,,0,Pantene,0,Y,3.0,0,1
1,2/01/2021,0,0,0,0,3,,0,Pantene,0,N,1.0,0,1
2,3/01/2021,0,3,0,1,0,,0,Pantene,1,Y,2.0,0,2
3,4/01/2021,0,2,0,0,1,,0,Pantene,0,N,3.0,0,3
4,5/01/2021,0,2,0,0,1,,0,Pantene,0,Y,1.0,0,2


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 14 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   date                    400 non-null    object 
 1   hair_loss               400 non-null    int64  
 2   stay_up_late            400 non-null    int64  
 3   pressure_level          400 non-null    int64  
 4   coffee_consumed         400 non-null    int64  
 5   brain_working_duration  400 non-null    int64  
 6   school_assesssment      400 non-null    object 
 7   stress_level            400 non-null    int64  
 8   shampoo_brand           400 non-null    object 
 9   swimming                400 non-null    int64  
 10  hair_washing            400 non-null    object 
 11  hair_grease             396 non-null    float64
 12  dandruff                400 non-null    int64  
 13  libido                  400 non-null    int64  
dtypes: float64(1), int64(9), object(4)
memory 

In [5]:
plot_cols = df.corr().loc['stay_up_late':, 'hair_loss'].abs().sort_values(ascending=False)
plot_cols

dandruff                  0.808751
stress_level              0.784986
pressure_level            0.779733
hair_grease               0.682490
coffee_consumed           0.671447
brain_working_duration    0.591093
stay_up_late              0.350551
swimming                  0.240211
libido                    0.140884
Name: hair_loss, dtype: float64

In [6]:
X_df= df[['stress_level', 'pressure_level', 'dandruff' ,'coffee_consumed']]

In [7]:
y_data = df['hair_loss'].to_numpy().astype(np.float32)
print(y_data.shape)

df.drop('hair_loss', axis=1, inplace=True)

X_data = X_df.to_numpy().astype(np.float32)

print(X_data.shape)

(400,)
(400, 4)


# Train Test Split

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.1, random_state=42)
print(X_train.shape)
print(X_test.shape)
print('_____________')
print(y_train.shape)
print(y_test.shape)


(360, 4)
(40, 4)
_____________
(360,)
(40,)


# Model Building

In [9]:
model = tf.keras.Sequential()

model.add(tf.keras.layers.Input(shape=(4)))

model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dense(32, activation='relu'))
model.add(tf.keras.layers.Dense(16, activation='relu'))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), metrics=['acc'], optimizer='adam')
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                320       
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dense_2 (Dense)             (None, 16)                528       
                                                                 
 dense_3 (Dense)             (None, 1)                 17        
                                                                 
Total params: 2,945
Trainable params: 2,945
Non-trainable params: 0
_________________________________________________________________


# Training

In [10]:
model.fit(X_train, y_train,epochs=500,batch_size=64,validation_split=0.20, verbose=3)

Epoch 1/500


  output, from_logits, "Sigmoid", "binary_crossentropy"


Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78/500
Epoch 7

<keras.callbacks.History at 0x2a3c922ab48>

# Evaluating

In [11]:
model.evaluate(X_test, y_test)



[0.06537796556949615, 0.9750000238418579]

# Result: He was hair loss!!!

In [12]:
model.predict(np.array([[2,2,2,5]]))



array([[0.9822907]], dtype=float32)