In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer 
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

In [2]:
# Load the CSV file
df = pd.read_csv("Dataset.csv") 
data = df["covid"] + "" + df["fever"]
X = data.astype(str)	# Test data 
y = df['flu']

In [3]:
# Splitting the data into training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [4]:
# Converting data into bag-of-data format to train the model 
vectorizer = CountVectorizer()

# initializing the converter
X_train_counts = vectorizer.fit_transform(X_train) # converting the training data
X_test_counts = vectorizer.transform(X_test) # converting the test data
# using and training the multinomial model of naive bayes algorithm 
classifier = MultinomialNB()	# initializing the classifier 
classifier.fit(X_train_counts, y_train) # training the classifier


In [5]:
# loading another dataset to test if the model is working properly
data1 = pd.read_csv("Dataset.csv") 
new_data = data1["covid"] + "" + data1["fever"]
new_data_counts = vectorizer.transform(new_data.astype(str))	# converting the new data

In [6]:
# making the model to predict the results for new dataset 
predictions = classifier.predict(new_data_counts)

In [7]:
# Output the results 
new_data = predictions 
print(new_data)

['yes' 'no' 'no' 'no' 'no' 'yes' 'no' 'yes' 'no' 'no']


In [8]:
# retrieving the accuracy and classification report
accuracy = accuracy_score(y_test, classifier.predict(X_test_counts)) 
print(f"\nAccuracy: {accuracy:.2f}")



Accuracy: 1.00


In [9]:
print("Classification Report: ")
print(classification_report(y_test, classifier.predict(X_test_counts)))

Classification Report: 
              precision    recall  f1-score   support

          no       1.00      1.00      1.00         2

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2



In [10]:
# Convert the predictions to a DataFrame
predictions_df = pd.DataFrame(predictions, columns = ['flu_prediction'])

In [11]:
# concatenate the original DataFrame with the predictions DataFrame 
data1 = pd.concat([data1, predictions_df], axis = 1)

In [12]:
# write the DataFrame back to CSV 
data1.to_csv("C:\\Users\\Admin\\Desktop\\TY05 Rehmah\\603 IR\\Test1.csv", index = False)