In [2]:

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pickle


df = pd.read_csv('/content/Covid.csv')


print("First 5 rows of data:\n", df.head())
print("\nShape of dataset:", df.shape)
print("\nColumns before stripping:", df.columns.tolist())


df.columns = df.columns.str.strip()
print("\nColumns after stripping:", df.columns.tolist())


le = LabelEncoder()
for col in df.columns:
    if df[col].dtype == 'object':
        df[col] = df[col].str.strip()
        df[col] = le.fit_transform(df[col])


print("\nDataset after encoding:\n", df.head())


X = df.iloc[:, :-1]
y = df.iloc[:, -1]


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print("\n--- Logistic Regression ---")
log_model = LogisticRegression(max_iter=1000)
log_model.fit(X_train, y_train)


y_pred_log = log_model.predict(X_test)


print("Accuracy (Logistic Regression):", accuracy_score(y_test, y_pred_log))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_log))
print("Classification Report:\n", classification_report(y_test, y_pred_log))


print("\n--- Support Vector Machine ---")
svm_model = SVC(kernel='rbf', random_state=42)
svm_model.fit(X_train, y_train)


y_pred_svm = svm_model.predict(X_test)


print("Accuracy (SVM):", accuracy_score(y_test, y_pred_svm))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_svm))
print("Classification Report:\n", classification_report(y_test, y_pred_svm))


best_model = svm_model if accuracy_score(y_test, y_pred_svm) > accuracy_score(y_test, y_pred_log) else log_model

with open('covid_model.pkl', 'wb') as f:
    pickle.dump(best_model, f)

print("\nBest model saved successfully as 'covid_model.pkl'")


First 5 rows of data:
   Breathing Problem Fever Dry Cough Sore throat Running Nose Asthma  \
0               Yes   Yes       Yes         Yes          Yes     No   
1               Yes   Yes       Yes         Yes           No    Yes   
2               Yes   Yes       Yes         Yes          Yes    Yes   
3               Yes   Yes       Yes          No           No    Yes   
4               Yes   Yes       Yes         Yes          Yes     No   

  Chronic Lung Disease Headache Heart Disease Diabetes  ... Fatigue   \
0                   No       No            No      Yes  ...      Yes   
1                  Yes      Yes            No       No  ...      Yes   
2                  Yes      Yes            No      Yes  ...      Yes   
3                   No       No           Yes      Yes  ...       No   
4                  Yes      Yes           Yes      Yes  ...       No   

  Gastrointestinal  Abroad travel Contact with COVID Patient  \
0               Yes            No                    