### Reading the File using pandas


In [1]:
# Ignore  the warnings
import warnings
warnings.filterwarnings('always')
warnings.filterwarnings('ignore')

# data visualisation and manipulation
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib import style
import seaborn as sns
from matplotlib import rcParams

#import the necessary modelling algos.

#classifiaction.
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC,SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier,AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
 

#regression
from sklearn.linear_model import LinearRegression,Ridge,Lasso,RidgeCV
from sklearn.ensemble import RandomForestRegressor,BaggingRegressor,GradientBoostingRegressor,AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor

#model selection
from sklearn.model_selection import train_test_split,cross_validate
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV

#preprocessing
from sklearn.preprocessing import MinMaxScaler,StandardScaler,Imputer,LabelEncoder

#evaluation metrics
from sklearn.metrics import mean_squared_log_error,mean_squared_error, r2_score,mean_absolute_error # for regression
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score  # for classification


In [2]:
df = pd.read_csv('iris.csv' )

df.columns = ['No.', 'sepal_length', 'sepal_width' , 'petal_length', 'petal_width', 'species']
df.head()

Unnamed: 0,No.,sepal_length,sepal_width,petal_length,petal_width,species
0,1,5.1,3.5,1.4,0.2,setosa
1,2,4.9,3.0,1.4,0.2,setosa
2,3,4.7,3.2,1.3,0.2,setosa
3,4,4.6,3.1,1.5,0.2,setosa
4,5,5.0,3.6,1.4,0.2,setosa


In [3]:
df.shape

(150, 6)

In [4]:
df.columns=df.columns.str.capitalize()
df.columns

Index(['No.', 'Sepal_length', 'Sepal_width', 'Petal_length', 'Petal_width',
       'Species'],
      dtype='object')

In [6]:
df = df.drop(['No.'], axis = 1)

df.head()

Unnamed: 0,Sepal_length,Sepal_width,Petal_length,Petal_width,Species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [7]:
in_clear=print(df.describe())

       Sepal_length  Sepal_width  Petal_length  Petal_width
count    150.000000   150.000000    150.000000   150.000000
mean       5.843333     3.057333      3.758000     1.199333
std        0.828066     0.435866      1.765298     0.762238
min        4.300000     2.000000      1.000000     0.100000
25%        5.100000     2.800000      1.600000     0.300000
50%        5.800000     3.000000      4.350000     1.300000
75%        6.400000     3.300000      5.100000     1.800000
max        7.900000     4.400000      6.900000     2.500000


### Checking for Null values

In [8]:
df.isnull().any().any()

False

### Checking for Duplicates 

In [9]:
df.columns.duplicated()

array([False, False, False, False, False])

In [10]:
df.Species.unique()

array(['setosa', 'versicolor', 'virginica'], dtype=object)

### Renaming Species 

In [11]:
df =df.replace("setosa" , 1)
df =df.replace("versicolor" , 2)
df =df.replace("virginica" , 3)

df.Species.unique()

array([1, 2, 3], dtype=int64)

### Splitting Data into Train and Test sets 

In [12]:
train, test = train_test_split(df, test_size=0.3) 
print(train.shape , test.shape)

(105, 5) (45, 5)


In [13]:
train_x = train[['Sepal_length','Sepal_width','Petal_length','Petal_width']]
train_y = train.Species

test_x = test[['Sepal_length','Sepal_width','Petal_length','Petal_width']]
test_y = test.Species #output

In [18]:
classifiers = [LogisticRegression(),
               LinearSVC(),
               KNeighborsClassifier(),
               DecisionTreeClassifier(),
               SVC(kernel='rbf'),
               RandomForestClassifier(),
               GradientBoostingClassifier(),
               GaussianNB()]
classifier_names = ['LogisticRegression',
                    'SVM',
                    'KNearestNeighbors',
                    'DecisionTree',
                    'rbf SVM',
                    'RandomForestClassifier',
                    'GradientBoostingClassifier',
                    'GaussianNB']

In [19]:
accuracy=[]
d={}

for i in range(len(classifiers)):
    clf=classifiers[i]
    clf.fit(train_x,train_y)
    pred=clf.predict(test_x)
    accuracy.append(accuracy_score(pred,test_y)*100)
     
d={'Modelling Algorithm': classifier_names,'Accuracy':accuracy}
d

{'Modelling Algorithm': ['LogisticRegression',
  'SVM',
  'KNearestNeighbors',
  'DecisionTree',
  'rbf SVM',
  'RandomForestClassifier',
  'GradientBoostingClassifier',
  'GaussianNB'],
 'Accuracy': [95.55555555555556,
  93.33333333333333,
  100.0,
  95.55555555555556,
  100.0,
  95.55555555555556,
  93.33333333333333,
  95.55555555555556]}

In [20]:
result = pd.DataFrame(d)

In [21]:
result

Unnamed: 0,Modelling Algorithm,Accuracy
0,LogisticRegression,95.555556
1,SVM,93.333333
2,KNearestNeighbors,100.0
3,DecisionTree,95.555556
4,rbf SVM,100.0
5,RandomForestClassifier,95.555556
6,GradientBoostingClassifier,93.333333
7,GaussianNB,95.555556
