### Reading the File using pandas


In [1]:
# Ignore  the warnings
import warnings
warnings.filterwarnings('always')
warnings.filterwarnings('ignore')

# data visualisation and manipulation
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib import style
import seaborn as sns
from matplotlib import rcParams

#import the necessary modelling algos.

#classifiaction.
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC,SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier,AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
 

#regression
from sklearn.linear_model import LinearRegression,Ridge,Lasso,RidgeCV
from sklearn.ensemble import RandomForestRegressor,BaggingRegressor,GradientBoostingRegressor,AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor

#model selection
from sklearn.model_selection import train_test_split,cross_validate
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV

#preprocessing
from sklearn.preprocessing import MinMaxScaler,StandardScaler,Imputer,LabelEncoder

#evaluation metrics
from sklearn.metrics import mean_squared_log_error,mean_squared_error, r2_score,mean_absolute_error # for regression
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score  # for classification


In [2]:
heiwei = pd.read_csv("heiwei.csv")

heiwei.head()

Unnamed: 0,Gender,Height,Weight
0,Male,73.847017,241.893563
1,Male,68.781904,162.310473
2,Male,74.110105,212.740856
3,Male,71.730978,220.04247
4,Male,69.881796,206.349801


In [3]:
heiwei.shape

(10000, 3)

In [4]:
heiwei.columns

Index(['Gender', 'Height', 'Weight'], dtype='object')

In [5]:
heiwei.Gender.unique()

array(['Male', 'Female'], dtype=object)

### Checking Null values

In [6]:
heiwei.isnull().any().any().sum()

0

### Checking for Duplicates  

In [7]:
heiwei.columns.duplicated()

array([False, False, False])

### Renaming Male and Female with Int values 

In [8]:
heiwei =heiwei.replace("Male" , 1)
heiwei = heiwei.replace("Female" , 2)

In [9]:
heiwei.head()

Unnamed: 0,Gender,Height,Weight
0,1,73.847017,241.893563
1,1,68.781904,162.310473
2,1,74.110105,212.740856
3,1,71.730978,220.04247
4,1,69.881796,206.349801


In [16]:
heiwei.describe()

Unnamed: 0,Gender,Height,Weight
count,10000.0,10000.0,10000.0
mean,1.5,66.36756,161.440357
std,0.500025,3.847528,32.108439
min,1.0,54.263133,64.700127
25%,1.0,63.50562,135.818051
50%,1.5,66.31807,161.212928
75%,2.0,69.174262,187.169525
max,2.0,78.998742,269.989699


### Splitting Data into Train and Test sets 

In [10]:
train, test = train_test_split(heiwei, test_size=0.3) 


print(train.shape , test.shape)

(7000, 3) (3000, 3)


In [11]:
train_x = train[['Height', 'Weight']]
train_y = train.Gender

test_x = test[['Height', 'Weight']]
test_y = test.Gender


###

In [12]:
classifiers = [LogisticRegression(),
               LinearSVC(),
               KNeighborsClassifier(),
               DecisionTreeClassifier(),
               SVC(kernel='rbf'),
               RandomForestClassifier(),
               GradientBoostingClassifier(),
               GaussianNB()]
classifier_names = ['LogisticRegression',
                    'SVM',
                    'KNearestNeighbors',
                    'DecisionTree',
                    'rbf SVM',
                    'RandomForestClassifier',
                    'GradientBoostingClassifier',
                    'GaussianNB']


In [13]:
accuracy=[]
d={}

for i in range(len(classifiers)):
    clf=classifiers[i]
    clf.fit(train_x,train_y)
    pred=clf.predict(test_x)
    accuracy.append(accuracy_score(pred,test_y)*100)
     
d={'Modelling Algorithm': classifier_names,'Accuracy':accuracy}
d

{'Modelling Algorithm': ['LogisticRegression',
  'SVM',
  'KNearestNeighbors',
  'DecisionTree',
  'rbf SVM',
  'RandomForestClassifier',
  'GradientBoostingClassifier',
  'GaussianNB'],
 'Accuracy': [91.96666666666667,
  85.56666666666666,
  91.03333333333333,
  88.0,
  91.96666666666667,
  90.4,
  91.86666666666666,
  89.0]}

In [14]:
result = pd.DataFrame(d)

In [15]:
result

Unnamed: 0,Modelling Algorithm,Accuracy
0,LogisticRegression,91.966667
1,SVM,85.566667
2,KNearestNeighbors,91.033333
3,DecisionTree,88.0
4,rbf SVM,91.966667
5,RandomForestClassifier,90.4
6,GradientBoostingClassifier,91.866667
7,GaussianNB,89.0
