In [1]:
import pandas as pd

## Reading Data

In [5]:
df = pd.read_csv('static/data.csv')

In [6]:
df.head()

Unnamed: 0,fever,bodyPain,age,runnyNose,diffBreath,infectionProb
0,100.152606,1,62,0,0,1
1,101.756095,1,28,0,1,0
2,101.849448,1,3,0,-1,1
3,98.769742,1,88,0,1,1
4,101.205276,1,83,0,0,1


In [4]:
df.tail()

Unnamed: 0,fever,bodyPain,age,runnynose,diffBreath,infectionProb
5574,99.958319,0,47,0,0,0
5575,101.237908,1,16,1,0,0
5576,99.687036,1,71,1,-1,1
5577,98.008461,1,41,1,-1,1
5578,100.190299,1,7,1,1,1


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5579 entries, 0 to 5578
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   fever          5579 non-null   float64
 1   bodyPain       5579 non-null   int64  
 2   age            5579 non-null   int64  
 3   runnynose      5579 non-null   int64  
 4   diffBreath     5579 non-null   int64  
 5   infectionProb  5579 non-null   int64  
dtypes: float64(1), int64(5)
memory usage: 261.6 KB


In [6]:
 df['diffBreath'].value_counts()

-1    1900
 1    1854
 0    1825
Name: diffBreath, dtype: int64

In [7]:
df.describe()

Unnamed: 0,fever,bodyPain,age,runnynose,diffBreath,infectionProb
count,5579.0,5579.0,5579.0,5579.0,5579.0,5579.0
mean,99.990646,0.506363,51.122065,0.50242,-0.008245,0.501882
std,1.141145,0.500004,28.89116,0.500039,0.820325,0.500041
min,98.000713,0.0,1.0,0.0,-1.0,0.0
25%,99.010156,0.0,26.0,0.0,-1.0,0.0
50%,99.967504,1.0,52.0,1.0,0.0,1.0
75%,100.980283,1.0,76.0,1.0,1.0,1.0
max,101.999712,1.0,100.0,1.0,1.0,1.0


## Train Test Splitting

In [8]:
import numpy as np

In [9]:
def data_split(data, ratio):
    
    #To check the random values generated is correct or not
    np.random.seed(42)
    
    #It will produce random permutated/shuffled numbers
    shuffled = np.random.permutation(len(data))
    
    # Defining the size of test set
    test_set_size = int(len(data) * ratio)
    
    # test data rows
    test_indices = shuffled[:test_set_size]
    
    # Defining trainset size
    train_indices = shuffled[test_set_size:]
    
    return data.iloc[train_indices], data.iloc[test_indices]

  
    
    

In [10]:
train, test = data_split(df, 0.25)

In [11]:
train

Unnamed: 0,fever,bodyPain,age,runnynose,diffBreath,infectionProb
1662,101.753008,0,30,0,0,1
2191,99.521963,1,79,0,-1,0
4073,100.155302,0,98,1,-1,0
585,98.976740,1,46,0,-1,1
4892,101.776111,0,84,0,-1,0
...,...,...,...,...,...,...
3772,101.859183,0,4,0,0,1
5191,100.775276,1,96,0,0,0
5226,101.242068,1,23,0,-1,0
5390,101.775884,1,62,0,1,0


In [12]:
test

Unnamed: 0,fever,bodyPain,age,runnynose,diffBreath,infectionProb
2633,99.189630,0,28,0,-1,0
1550,100.340959,0,8,0,-1,1
724,100.246063,1,97,1,-1,1
2788,100.110798,1,47,1,-1,1
468,101.592579,1,98,1,0,1
...,...,...,...,...,...,...
19,98.562342,1,6,0,-1,1
4898,101.417183,0,27,0,-1,0
668,100.513732,1,17,1,1,0
218,101.014501,0,61,0,-1,1


In [13]:
X_train = train[['fever', 'bodyPain', 'age', 'runnynose', 'diffBreath']].to_numpy()
X_test = test[['fever', 'bodyPain', 'age', 'runnynose', 'diffBreath']].to_numpy()

In [14]:
Y_train = train[['infectionProb']].to_numpy().reshape(4185, )
Y_test = test[['infectionProb']].to_numpy().reshape(1394, )

In [15]:
Y_train

array([1, 0, 0, ..., 0, 0, 0], dtype=int64)

In [16]:
from sklearn.linear_model import LogisticRegression

In [17]:
clf = LogisticRegression()
clf.fit(X_train, Y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [18]:
inputFeatures = [100, 1, 22, -1, 1]
infProb = clf.predict_proba([inputFeatures])[0][1]
results = clf.predict([inputFeatures])[0]

In [19]:
infProb

0.5344107630911927

In [20]:
results

1

In [21]:
if results == 1:
    print("Covid-19 infected")
else:
    print("Covid-19 negative")

Covid-19 infected


In [22]:
import pickle