In [None]:
#imports

In [None]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

In [None]:
#MLA Algorithms 
class LogisticRegression:
    def __init__(self, learning_rate=0.01, num_iterations=1000):
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
    
    def sigmoid(self, z):
        # sigmoid function to convert linear output into probabilities
        return 1 / (1 + np.exp(-z))
    
    def fit(self, X, y):
        # fit the logistic regression model to the training data
        self.X = X
        self.y = y.reshape(-1, 1) # convert to a column vector
        self.m, self.n = self.X.shape
        self.weights = np.zeros((self.n, 1)) # initialize weights to zeros
        self.bias = 0
        
        # gradient descent to minimize cost function
        for i in range(self.num_iterations):
            # forward propagation
            z = np.dot(self.X, self.weights) + self.bias
            A = self.sigmoid(z)
            
            # backward propagation
            dz = A - self.y
            dw = (1 / self.m) * np.dot(self.X.T, dz)
            db = (1 / self.m) * np.sum(dz)
            
            # update parameters
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db
    
    def predict(self, X):
        # predict labels for new data using the trained logistic regression model
        z = np.dot(X, self.weights) + self.bias
        A = self.sigmoid(z)
        return np.round(A).astype(int) # convert probabilities to binary labels
    
    def score(self, X, y):
        # calculate accuracy of the logistic regression model on the given test data
        y_pred = self.predict(X)
        return np.mean(y_pred == y.reshape(-1, 1))
      

class NaiveBayes:
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.classes = np.unique(y)
        n_classes = len(self.classes)
        
        # Calculate mean, variance, and prior probability for each class
        self.mean = np.zeros((n_classes, n_features))
        self.variance = np.zeros((n_classes, n_features))
        self.prior = np.zeros(n_classes)
        for idx, c in enumerate(self.classes):
            X_c = X[y == c]
            self.mean[idx, :] = X_c.mean(axis=0)
            self.variance[idx, :] = X_c.var(axis=0)
            self.prior[idx] = X_c.shape[0] / float(n_samples)

    def predict(self, X):
        # Calculate the posterior probability for each class
        posteriors = []
        for idx, c in enumerate(self.classes):
            prior = np.log(self.prior[idx])
            posterior = np.sum(np.log(self.probability_density_function(X, idx)), axis=1)
            posterior = prior + posterior
            posteriors.append(posterior)
        
        # Return the class with the highest posterior probability
        return self.classes[np.argmax(posteriors)]

    def probability_density_function(self, X, idx):
        # Calculate the probability density function for each feature
        mean = self.mean[idx]
        var = self.variance[idx]
        numerator = np.exp(-((X - mean)**2 / (2 * var)))
        denominator = np.sqrt(2 * np.pi * var)
        return numerator / denominator  


In [None]:
# Read the dataframe
df = pd.read_csv('covid_data.csv')
df

Unnamed: 0,Age,Sex,Fever,Cough,Fatigue,Shortness of breath,Body aches,Headache,Loss of smell or taste,COVID-19
0,35,1,0,0,1,1,0,1,0,1
1,84,1,0,0,0,1,1,0,0,1
2,44,0,0,0,0,1,1,0,0,1
3,39,1,0,0,0,1,0,1,0,1
4,33,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...
995,60,0,0,0,1,1,0,0,0,1
996,64,0,1,0,0,1,0,0,0,1
997,79,0,0,0,0,0,1,0,0,0
998,35,1,1,1,1,1,0,0,0,1


In [None]:
# Rename headers
header_names=['age', 'sex', 'fever', 'cough', 'fatigue', 'short_breathe', 'body_aches', 'headache', 'smell_taste_loss', 'covid']
df = pd.read_csv('covid_data.csv',header=None, skiprows=1,names=header_names)
df

Unnamed: 0,age,sex,fever,cough,fatigue,short_breathe,body_aches,headache,smell_taste_loss,covid
0,35,1,0,0,1,1,0,1,0,1
1,84,1,0,0,0,1,1,0,0,1
2,44,0,0,0,0,1,1,0,0,1
3,39,1,0,0,0,1,0,1,0,1
4,33,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...
995,60,0,0,0,1,1,0,0,0,1
996,64,0,1,0,0,1,0,0,0,1
997,79,0,0,0,0,0,1,0,0,0
998,35,1,1,1,1,1,0,0,0,1


In [None]:
#Prints information about the dataframe
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype
---  ------            --------------  -----
 0   age               1000 non-null   int64
 1   sex               1000 non-null   int64
 2   fever             1000 non-null   int64
 3   cough             1000 non-null   int64
 4   fatigue           1000 non-null   int64
 5   short_breathe     1000 non-null   int64
 6   body_aches        1000 non-null   int64
 7   headache          1000 non-null   int64
 8   smell_taste_loss  1000 non-null   int64
 9   covid             1000 non-null   int64
dtypes: int64(10)
memory usage: 78.2 KB


In [None]:
#Check and sum all null data
df.isnull().sum

<bound method NDFrame._add_numeric_operations.<locals>.sum of        age    sex  fever  cough  fatigue  short_breathe  body_aches  headache  \
0    False  False  False  False    False          False       False     False   
1    False  False  False  False    False          False       False     False   
2    False  False  False  False    False          False       False     False   
3    False  False  False  False    False          False       False     False   
4    False  False  False  False    False          False       False     False   
..     ...    ...    ...    ...      ...            ...         ...       ...   
995  False  False  False  False    False          False       False     False   
996  False  False  False  False    False          False       False     False   
997  False  False  False  False    False          False       False     False   
998  False  False  False  False    False          False       False     False   
999  False  False  False  False    False       

In [None]:
#Check and sum all duplicated information
df.duplicated().sum()

118

In [None]:
#Delete all of the duplicated information and add the new dataframe to a new variable
cv = df.drop_duplicates()
cv

Unnamed: 0,age,sex,fever,cough,fatigue,short_breathe,body_aches,headache,smell_taste_loss,covid
0,35,1,0,0,1,1,0,1,0,1
1,84,1,0,0,0,1,1,0,0,1
2,44,0,0,0,0,1,1,0,0,1
3,39,1,0,0,0,1,0,1,0,1
4,33,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...
994,34,1,1,0,0,1,1,0,1,1
995,60,0,0,0,1,1,0,0,0,1
996,64,0,1,0,0,1,0,0,0,1
998,35,1,1,1,1,1,0,0,0,1


In [None]:
# Describe the dataframe to check the necessity to treat the information
cv.describe()

Unnamed: 0,age,sex,fever,cough,fatigue,short_breathe,body_aches,headache,smell_taste_loss,covid
count,882.0,882.0,882.0,882.0,882.0,882.0,882.0,882.0,882.0,882.0
mean,54.690476,0.493197,0.283447,0.209751,0.482993,0.400227,0.312925,0.217687,0.109977,0.651927
std,20.643026,0.500237,0.450927,0.407362,0.499994,0.490222,0.463947,0.412908,0.313039,0.476629
min,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,37.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,55.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
75%,72.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0
max,90.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [None]:
# Print the quantity of unique data in each column
for c in cv.columns:
  print(f"{c}: {cv[c].nunique()}")

age: 71
sex: 2
fever: 2
cough: 2
fatigue: 2
short_breathe: 2
body_aches: 2
headache: 2
smell_taste_loss: 2
covid: 2


In [None]:
# Drop covid and add the other columns to be used on sklearn
X = cv.drop('covid', axis = 1)
y = cv['covid']
y

0      1
1      1
2      1
3      1
4      0
      ..
994    1
995    1
996    1
998    1
999    1
Name: covid, Length: 882, dtype: int64

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
#LogisticRegression
log = LogisticRegression()

In [None]:
#training
log.fit(X_train.values, y_train.values)

In [None]:
# score accuracy of the trained sets
log.score(X_train.values, y_train.values)

0.6524822695035462

In [None]:
# score accuracy of the testing sets
log.score(X_test.values, y_test.values)

0.6497175141242938

In [None]:
# Naive Bayes
log2 = NaiveBayes()

In [None]:
#training
log2.fit(X_train.values, y_train.values)

In [None]:
# Asking the user to input its data

age = (int(input("Type your age: ")))

sex = (int(input("Type your sex (0 for Male / 1 for Female): ")))

fever = (int(input("Type if you have Fever (0 for No / 1 for Yes): ")))
cough = (int(input("Type if you have Cough (0 for No / 1 for Yes): ")))
fatigue = (int(input("Type if you have Fatigue (0 for No / 1 for Yes): ")))
short_breathe = (int(input("Type if you have Shortness of breath (0 for No / 1 for Yes): ")))
body_aches = (int(input("Type if you have Body aches (0 for No / 1 for Yes): ")))
headache = (int(input("Type if you have Headache (0 for No / 1 for Yes): ")))
smell_taste_loss = (int(input("Type if you have Loss of smell or taste (0 for No / 1 for Yes): ")))



Type your age: 21
Type your sex (0 for Male / 1 for Female): 1
Type if you have Fever (0 for No / 1 for Yes): 1
Type if you have Cough (0 for No / 1 for Yes): 1
Type if you have Fatigue (0 for No / 1 for Yes): 1
Type if you have Shortness of breath (0 for No / 1 for Yes): 1
Type if you have Body aches (0 for No / 1 for Yes): 1
Type if you have Headache (0 for No / 1 for Yes): 1
Type if you have Loss of smell or taste (0 for No / 1 for Yes): 1


In [None]:

# Make a prediction using the trained model
data_input = [[age, sex, fever, cough, fatigue, short_breathe, body_aches, headache, smell_taste_loss]]
data_input_np = np.array(data_input)
data_input_framed = pd.DataFrame(data_input_np)

predictio1n = log.predict(data_input_framed)

# Print the prediction
if predictio1n[0] == 1:
    print("Using LogisticRegression prediction, you may have COVID-19. Please consult a healthcare professional.")
else:
    print("Using LogisticRegression prediction, it is unlikely that you have COVID-19, but please continue to monitor your symptoms.")

Using LogisticRegression prediction, you may have COVID-19. Please consult a healthcare professional.


In [None]:
#Makes the prediction using the Naibe Bayes
predictio2n = log2.predict(data_input_framed)


In [None]:
# Print the prediction
if predictio2n == 1:
    print("Using Naive-Bayes prediction, you may have COVID-19. Please consult a healthcare professional.")
else:
    print("Using Naive-Bayes prediction, it is unlikely that you have COVID-19, but please continue to monitor your symptoms.")

Using Naive-Bayes prediction, you may have COVID-19. Please consult a healthcare professional.
