## Importing Libraries

In [1]:
import pandas as pd
import numpy as np

## Reading Data

In [2]:
df = pd.read_csv('data.csv')
df.head()

Unnamed: 0,fever,Bodypain,age,Runnynose,diffBreath,infectionProb
0,99.202806,0,21,0,0,1
1,101.078977,1,36,0,-1,1
2,99.977729,0,95,0,0,1
3,100.492112,0,56,1,1,0
4,101.44373,1,106,1,1,1


In [3]:
df.shape

(700, 6)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 700 entries, 0 to 699
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   fever          700 non-null    float64
 1   Bodypain       700 non-null    int64  
 2   age            700 non-null    int64  
 3   Runnynose      700 non-null    int64  
 4   diffBreath     700 non-null    int64  
 5   infectionProb  700 non-null    int64  
dtypes: float64(1), int64(5)
memory usage: 32.9 KB


In [5]:
df['diffBreath'].value_counts()

 1    237
-1    235
 0    228
Name: diffBreath, dtype: int64

In [6]:
df.describe()

Unnamed: 0,fever,Bodypain,age,Runnynose,diffBreath,infectionProb
count,700.0,700.0,700.0,700.0,700.0,700.0
mean,99.967642,0.504286,53.52,0.465714,0.002857,0.515714
std,1.1231,0.500339,30.693201,0.49918,0.821731,0.50011
min,98.022858,0.0,0.0,0.0,-1.0,0.0
25%,98.980814,0.0,28.0,0.0,-1.0,0.0
50%,99.975105,1.0,54.5,0.0,0.0,1.0
75%,100.861552,1.0,78.0,1.0,1.0,1.0
max,101.990911,1.0,110.0,1.0,1.0,1.0


## Train Test Splitting

In [7]:
def data_split(data, ratio):
    np.random.seed(42)
    shuffled = np.random.permutation(len(data))
    test_set_size = int(len(data) * ratio)
    test_indices = shuffled[:test_set_size]
    train_indices = shuffled[test_set_size:]
    return data.iloc[train_indices], data.iloc[test_indices]

In [8]:
np.random.permutation(9)

array([3, 1, 5, 4, 2, 6, 7, 0, 8])

In [9]:
train, test = data_split(df, 0.2)

In [10]:
train

Unnamed: 0,fever,Bodypain,age,Runnynose,diffBreath,infectionProb
82,100.213465,1,70,0,1,1
51,101.240472,0,4,1,1,1
220,101.795311,0,90,0,0,1
669,98.231933,0,57,1,0,0
545,100.544136,0,45,0,1,1
...,...,...,...,...,...,...
71,99.687515,1,26,0,0,1
106,98.471963,0,58,0,1,1
270,98.185957,1,51,1,-1,0
435,100.618994,1,96,1,1,1


In [11]:
test

Unnamed: 0,fever,Bodypain,age,Runnynose,diffBreath,infectionProb
158,101.088009,0,108,0,0,0
500,100.698519,1,75,0,1,1
396,99.798189,0,71,0,-1,0
155,100.703711,0,21,0,1,0
321,99.929317,0,36,1,-1,1
...,...,...,...,...,...,...
24,98.580210,0,34,1,-1,0
218,98.982562,0,35,0,0,0
431,99.041933,0,67,0,0,0
281,100.630155,0,103,0,-1,1


In [12]:
x_train = train[['fever','Bodypain','age','Runnynose','diffBreath']].to_numpy()
x_test = test[['fever','Bodypain','age','Runnynose','diffBreath']].to_numpy()

In [13]:
y_train = train[['infectionProb']].to_numpy().reshape(560,)
y_test = test[['infectionProb']].to_numpy().reshape(140,)

In [14]:
y_train

array([1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0,
       1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0,
       1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1,
       1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0,
       1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
       0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0,
       1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0,
       1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0,
       1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0,
       0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1,
       0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0,
       0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1,

In [15]:
from sklearn.linear_model import LogisticRegression

In [16]:
model = LogisticRegression()
model.fit(x_train, y_train)

LogisticRegression()

In [17]:
inputFeatures = [100, 1 , 22, -1 ,1]
infProb = model.predict_proba([inputFeatures])[0][1]

In [18]:
infProb

0.6013123126765708