### Import required libraries 

In [1]:
import numpy as np
import pandas as pd

### Load data 

In [2]:
df= pd.read_excel('Data_practice.xlsx')

### Insights from data 

In [3]:
df.shape

(23, 4)

In [4]:
df.head()

Unnamed: 0,Weight,Height,BMI,look
0,60.0,1.5,26.666667,Overweight
1,65.0,1.5,28.888889,Overweight
2,70.0,1.5,31.111111,Overweight
3,75.0,1.5,33.333333,Overweight
4,80.0,1.5,35.555556,Overweight


In [5]:
df.sample(4)

Unnamed: 0,Weight,Height,BMI,look
9,80.0,1.6,31.25,Overweight
16,65.0,1.8,20.061728,Fit
19,80.0,1.8,24.691358,Fit
2,70.0,1.5,31.111111,Overweight


In [6]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Weight,22.0,69.772727,6.982508,60.0,65.0,70.0,75.0,80.0
Height,22.0,1.672727,0.13159,1.5,1.6,1.7,1.8,1.9
BMI,23.0,25.018386,5.075479,16.620499,21.183092,24.691358,28.285275,35.555556


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23 entries, 0 to 22
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Weight  22 non-null     float64
 1   Height  22 non-null     float64
 2   BMI     23 non-null     float64
 3   look    23 non-null     object 
dtypes: float64(3), object(1)
memory usage: 864.0+ bytes


In [8]:
df.dtypes

Weight    float64
Height    float64
BMI       float64
look       object
dtype: object

### Null value check

In [9]:
df.isnull().sum()

Weight    1
Height    1
BMI       0
look      0
dtype: int64

In [10]:
x = df["Weight"].mean()

df["Weight"].fillna(x, inplace = True)

In [11]:
df.isnull().sum()

Weight    0
Height    1
BMI       0
look      0
dtype: int64

### Null value treatment 

In [12]:
x = df["Height"].mean()

df["Height"].fillna(x, inplace = True)

In [13]:
df.isnull().sum()

Weight    0
Height    0
BMI       0
look      0
dtype: int64

In [14]:
# Drop null value
df1= pd.read_excel('Data_practice.xlsx')
df1.isnull().sum()

Weight    1
Height    1
BMI       0
look      0
dtype: int64

In [15]:
df1=df1.dropna()

In [16]:
df1.isnull().sum()

Weight    0
Height    0
BMI       0
look      0
dtype: int64

### Encoding 

In [17]:
# Encode categorical variables
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df = df.apply(le.fit_transform)

### Split into features and labels 

In [18]:
# Split into features and labels
df_new = df.drop(columns=['BMI'])
df_new


Unnamed: 0,Weight,Height,look
0,0,0,1
1,1,0,1
2,3,0,1
3,4,0,1
4,5,0,1
5,0,1,0
6,1,1,1
7,3,1,1
8,4,1,1
9,5,1,1


In [19]:
features = df_new.columns[:-1]
label = df_new.columns[-1]
X = df_new[features]
y = df_new[label]

In [20]:
X


Unnamed: 0,Weight,Height
0,0,0
1,1,0
2,3,0
3,4,0
4,5,0
5,0,1
6,1,1
7,3,1
8,4,1
9,5,1


In [21]:
y

0     1
1     1
2     1
3     1
4     1
5     0
6     1
7     1
8     1
9     1
10    0
11    0
12    0
13    1
14    1
15    2
16    0
17    0
18    0
19    0
20    2
21    2
22    2
Name: look, dtype: int32

### Split the data into train and test

In [22]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=42)

In [23]:
X_train.shape

(18, 2)

In [24]:
X_test.shape

(5, 2)

In [25]:
y_train.shape


(18,)

In [26]:
y_test.shape

(5,)

### Scaling 

In [27]:
# scaling the data using MinMax Scaling process
from sklearn.preprocessing import StandardScaler,MinMaxScaler
scaler = MinMaxScaler() #standard
X_train = scaler.fit_transform(X_train)
#X_train=X_train(scale)
X_train

X_test = scaler.fit_transform(X_test)
X_test

array([[0.  , 1.  ],
       [1.  , 0.25],
       [0.  , 0.  ],
       [0.8 , 0.25],
       [0.6 , 1.  ]])

### # Model building 

In [28]:
from sklearn.linear_model import LogisticRegression

### Model fit 

In [29]:
m=LogisticRegression()

In [30]:
s1= m.fit(X_train,y_train)

### Prediction from model

In [31]:
y_pred = m.predict(X_test)

In [32]:
y_pred

array([0, 1, 1, 1, 0])

In [33]:
y_test

15    2
9     1
0     1
8     1
17    0
Name: look, dtype: int32

In [34]:
df_com = pd. DataFrame({'Actual': y_test, 'Predicted': y_pred}) 

In [35]:
df_com

Unnamed: 0,Actual,Predicted
15,2,0
9,1,1
0,1,1
8,1,1
17,0,0


### Model evaluation 

In [36]:
from sklearn.metrics import confusion_matrix

print(confusion_matrix(y_test,y_pred))

#tn, fp, fn, tp = confusion_matrix(y_test,y_pred).ravel()
#print(tn, fp, fn, tp)

[[1 0 0]
 [0 3 0]
 [1 0 0]]


In [37]:
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score

print('accuracy:', accuracy_score(y_test, y_pred))
print('recall:', recall_score(y_test, y_pred,average='weighted'))
print('f1-score:', f1_score(y_test, y_pred,average='weighted'))
print('precision:', precision_score(y_test, y_pred,average='weighted'))

accuracy: 0.8
recall: 0.8
f1-score: 0.7333333333333333
precision: 0.7


  _warn_prf(average, modifier, msg_start, len(result))
