## Classification

### Import required packages

In [40]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### Get the data from dataset

In [41]:
df = pd.read_csv('hearing_test.csv')
print(df)

       age  physical_score  test_result
0     33.0            40.7            1
1     50.0            37.2            1
2     52.0            24.7            0
3     56.0            31.0            0
4     35.0            42.9            1
...    ...             ...          ...
4995  73.0             3.9            0
4996  57.0            33.9            1
4997  49.0            34.5            1
4998  38.0            46.4            1
4999  48.0            38.5            1

[5000 rows x 3 columns]


In [42]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   age             5000 non-null   float64
 1   physical_score  5000 non-null   float64
 2   test_result     5000 non-null   int64  
dtypes: float64(2), int64(1)
memory usage: 117.3 KB


In [43]:
df.describe()

Unnamed: 0,age,physical_score,test_result
count,5000.0,5000.0,5000.0
mean,51.609,32.76026,0.6
std,11.287001,8.169802,0.489947
min,18.0,-0.0,0.0
25%,43.0,26.7,0.0
50%,51.0,35.3,1.0
75%,60.0,38.9,1.0
max,90.0,50.0,1.0


### EDA

In [44]:
df.cov()

Unnamed: 0,age,physical_score,test_result
age,127.396398,-72.123723,-3.777956
physical_score,-72.123723,66.74566,3.173059
test_result,-3.777956,3.173059,0.240048


In [45]:
df.corr()

Unnamed: 0,age,physical_score,test_result
age,1.0,-0.782146,-0.683171
physical_score,-0.782146,1.0,0.792716
test_result,-0.683171,0.792716,1.0


### Data cleansing

In [46]:
df.isna().sum()

age               0
physical_score    0
test_result       0
dtype: int64

### prepare the data

In [52]:
# decide the independent variable(s)
x = df[['age', 'physical_score']]
# decide the dependent variable
y = df['test_result']

### Split the data into train and test

In [53]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8, random_state=123456)

### Create the model using Logistic RegressionCV

In [54]:
from sklearn.linear_model import LogisticRegressionCV

# create empty model
model = LogisticRegressionCV()

# train the model
model.fit(x_train,y_train)

### Evaluate the model using Logistic RegressionCV

In [56]:
# consider the y_test as y_true
y_true = y_test

# predict the values on x_test
y_pred = model.predict(x_test)

In [57]:
y_true

4158    1
2117    1
3328    1
4813    1
4176    1
       ..
2375    1
339     1
2090    1
867     1
3977    1
Name: test_result, Length: 1000, dtype: int64

In [58]:
y_pred

array([1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0,
       1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
       1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0,
       1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0,
       1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,
       0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0,
       0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
       0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1,

In [59]:
# confusion matrix
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_true, y_pred)
cm

array([[332,  61],
       [ 31, 576]], dtype=int64)

In [60]:
accuracy = (cm[0][0] + cm[1][1]) / (cm[0][0] + cm[0][1] + cm[1][0] + cm[1][1])
accuracy

0.908

In [61]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_true, y_pred)
accuracy

0.908

In [62]:
tn, fp, fn, tp = cm.ravel()
print(tn)
print(fp)
print(fn)
print(tp)

332
61
31
576
