In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


from collections import Counter

from sklearn.ensemble import (RandomForestClassifier, AdaBoostClassifier,
                            GradientBoostingClassifier, ExtraTreesClassifier,
                            VotingClassifier)

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, cross_val_score, StratifiedKFold, learning_curve


train = pd.read_csv('../input/train.csv')
test = pd.read_csv('../input/test.csv')
IDtest = test['PassengerId']


In [6]:
def detect_outliners(df, n, features):
    outlier_indices = []

    for col in features:
        Q1 = np.percentile(df[col], 25)

        Q3 = np.percentile(df[col], 75)

        IQR = Q3 - Q1

        outlier_step = 1.5 * IQR

        outlier_list_col = df[(df[col] < Q1 - outlier_step) | (df[col] > Q3 + outlier_step)].index

        outlier_indices.extend(outlier_list_col)

    outlier_indices = Counter(outlier_indices)
    multiple_outliers = list( k for k, v  in outlier_indices.items() if v > n)

    return multiple_outliers


Outliers_to_drop = detect_outliners(train, 2, ['Age', 'SibSp','Parch','Fare'])


In [12]:
test_i = train[train['Age'] > 50].index

In [18]:
test_i = Counter(test_i)

In [16]:
list(k for k, v in test_i.items() if v > 2)

[]

In [17]:
test_i.items()

dict_items([(6, 1), (11, 1), (15, 1), (33, 1), (54, 1), (94, 1), (96, 1), (116, 1), (124, 1), (150, 1), (152, 1), (155, 1), (170, 1), (174, 1), (195, 1), (222, 1), (232, 1), (249, 1), (252, 1), (262, 1), (268, 1), (275, 1), (280, 1), (317, 1), (326, 1), (366, 1), (406, 1), (438, 1), (449, 1), (456, 1), (467, 1), (483, 1), (487, 1), (492, 1), (493, 1), (496, 1), (513, 1), (545, 1), (555, 1), (570, 1), (571, 1), (582, 1), (587, 1), (591, 1), (625, 1), (626, 1), (630, 1), (631, 1), (647, 1), (659, 1), (672, 1), (684, 1), (694, 1), (695, 1), (714, 1), (745, 1), (765, 1), (772, 1), (774, 1), (820, 1), (829, 1), (851, 1), (857, 1), (879, 1)])

In [19]:
test_i

Counter({6: 1,
         11: 1,
         15: 1,
         33: 1,
         54: 1,
         94: 1,
         96: 1,
         116: 1,
         124: 1,
         150: 1,
         152: 1,
         155: 1,
         170: 1,
         174: 1,
         195: 1,
         222: 1,
         232: 1,
         249: 1,
         252: 1,
         262: 1,
         268: 1,
         275: 1,
         280: 1,
         317: 1,
         326: 1,
         366: 1,
         406: 1,
         438: 1,
         449: 1,
         456: 1,
         467: 1,
         483: 1,
         487: 1,
         492: 1,
         493: 1,
         496: 1,
         513: 1,
         545: 1,
         555: 1,
         570: 1,
         571: 1,
         582: 1,
         587: 1,
         591: 1,
         625: 1,
         626: 1,
         630: 1,
         631: 1,
         647: 1,
         659: 1,
         672: 1,
         684: 1,
         694: 1,
         695: 1,
         714: 1,
         745: 1,
         765: 1,
         772: 1,
         774: 1,
     

In [7]:
train.loc[Outliers_to_drop]


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
27,28,0,1,"Fortune, Mr. Charles Alexander",male,19.0,3,2,19950,263.0,C23 C25 C27,S
88,89,1,1,"Fortune, Miss. Mabel Helen",female,23.0,3,2,19950,263.0,C23 C25 C27,S
159,160,0,3,"Sage, Master. Thomas Henry",male,,8,2,CA. 2343,69.55,,S
180,181,0,3,"Sage, Miss. Constance Gladys",female,,8,2,CA. 2343,69.55,,S
201,202,0,3,"Sage, Mr. Frederick",male,,8,2,CA. 2343,69.55,,S
324,325,0,3,"Sage, Mr. George John Jr",male,,8,2,CA. 2343,69.55,,S
341,342,1,1,"Fortune, Miss. Alice Elizabeth",female,24.0,3,2,19950,263.0,C23 C25 C27,S
792,793,0,3,"Sage, Miss. Stella Anna",female,,8,2,CA. 2343,69.55,,S
846,847,0,3,"Sage, Mr. Douglas Bullen",male,,8,2,CA. 2343,69.55,,S
863,864,0,3,"Sage, Miss. Dorothy Edith ""Dolly""",female,,8,2,CA. 2343,69.55,,S


In [8]:
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
