## AIDS Dataset 

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math

In [2]:
data=pd.read_csv("AIDS_Classification.csv")

In [3]:
data.head()

Unnamed: 0,time,trt,age,wtkg,hemo,homo,drugs,karnof,oprior,z30,...,str2,strat,symptom,treat,offtrt,cd40,cd420,cd80,cd820,infected
0,948,2,48,89.8128,0,0,0,100,0,0,...,0,1,0,1,0,422,477,566,324,0
1,1002,3,61,49.4424,0,0,0,90,0,1,...,1,3,0,1,0,162,218,392,564,1
2,961,3,45,88.452,0,1,1,90,0,1,...,1,3,0,1,1,326,274,2063,1893,0
3,1166,3,47,85.2768,0,1,0,100,0,1,...,1,3,0,1,0,287,394,1590,966,0
4,1090,0,43,66.6792,0,1,0,100,0,1,...,1,3,0,0,0,504,353,870,782,0


In [4]:
data.isnull().sum()

time        0
trt         0
age         0
wtkg        0
hemo        0
homo        0
drugs       0
karnof      0
oprior      0
z30         0
preanti     0
race        0
gender      0
str2        0
strat       0
symptom     0
treat       0
offtrt      0
cd40        0
cd420       0
cd80        0
cd820       0
infected    0
dtype: int64

there is no null data to fill

In [5]:
X=data.iloc[:,:-1].values
y=data["infected"].values

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
X_train, X_test, y_train, y_test=train_test_split(X,y,test_size=0.2,random_state=1)

In [8]:
from sklearn.preprocessing import MinMaxScaler

In [10]:
X_train=MinMaxScaler().fit_transform(X_train)
X_test=MinMaxScaler().fit_transform(X_test)

## Random Forest Classification

In [11]:
from sklearn.ensemble import RandomForestClassifier

In [12]:
rfc=RandomForestClassifier()

In [13]:
rfc.fit(X_train,y_train)

In [14]:
y_pred=rfc.predict(X_test)

## Check the accuracy and conf matrix

In [15]:
from sklearn.metrics import confusion_matrix, accuracy_score

In [16]:
cm_rfc=confusion_matrix(y_pred,y_test)
cm_rfc

array([[293,  29],
       [ 16,  90]], dtype=int64)

In [17]:
accscore_rfc=accuracy_score(y_pred,y_test)
accscore_rfc

0.8948598130841121

## Naive Bayes

In [19]:
from sklearn.naive_bayes import GaussianNB

In [20]:
GaussianNB=GaussianNB()

In [21]:
GaussianNB.fit(X_train,y_train)

In [22]:
y_predGNB=GaussianNB.predict(X_test)

In [23]:
cm_gnb=confusion_matrix(y_predGNB,y_test)
cm_gnb

array([[274,  47],
       [ 35,  72]], dtype=int64)

In [24]:
accscore_gnb=accuracy_score(y_predGNB,y_test)
accscore_gnb

0.8084112149532711

## KNN

In [25]:
from sklearn.neighbors import KNeighborsClassifier

In [36]:
knn=KNeighborsClassifier(n_neighbors=3)

In [37]:
knn.fit(X_train,y_train)

In [38]:
y_pred_knn=knn.predict(X_test)

In [39]:
cm_knn=confusion_matrix(y_pred_knn,y_test)
cm_knn

array([[291,  64],
       [ 18,  55]], dtype=int64)

In [40]:
accscore_knn=accuracy_score(y_pred_knn,y_test)
accscore_knn

0.8084112149532711

## Grid Search

In [41]:
from sklearn.model_selection import  GridSearchCV

param_grid = {
    'n_neighbors': range(1, 31),
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan', 'minkowski']
}

In [42]:
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')

In [43]:
grid_search.fit(X_train, y_train)

In [44]:
print(f"Best params: {grid_search.best_params_}")
print(f"Best accuracy score: {grid_search.best_score_}")

Best params: {'metric': 'euclidean', 'n_neighbors': 6, 'weights': 'distance'}
Best accuracy score: 0.8240806096874842


In [46]:
from sklearn.metrics import classification_report

In [47]:
y_pred = grid_search.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.81      0.94      0.87       309
           1       0.73      0.43      0.54       119

    accuracy                           0.80       428
   macro avg       0.77      0.68      0.70       428
weighted avg       0.79      0.80      0.78       428



 knn classification not the one that I looking for. Random forest classification accuracy is better

In [48]:
data.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
time,2139.0,879.098177,292.274324,14.0,727.0,997.0,1091.0,1231.0
trt,2139.0,1.520804,1.12789,0.0,1.0,2.0,3.0,3.0
age,2139.0,35.248247,8.709026,12.0,29.0,34.0,40.0,70.0
wtkg,2139.0,75.125311,13.263164,31.0,66.6792,74.3904,82.5552,159.93936
hemo,2139.0,0.084151,0.27768,0.0,0.0,0.0,0.0,1.0
homo,2139.0,0.661057,0.473461,0.0,0.0,1.0,1.0,1.0
drugs,2139.0,0.13137,0.337883,0.0,0.0,0.0,0.0,1.0
karnof,2139.0,95.44647,5.900985,70.0,90.0,100.0,100.0,100.0
oprior,2139.0,0.021973,0.146629,0.0,0.0,0.0,0.0,1.0
z30,2139.0,0.550257,0.497584,0.0,0.0,1.0,1.0,1.0
