%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
# Import the Pandas library
import pandas as pd
# Load the train and test datasets to create two DataFrames
train_url = "http://s3.amazonaws.com/assets.datacamp.com/course/Kaggle/train.csv"
train = pd.read_csv(train_url)
test_url = "http://s3.amazonaws.com/assets.datacamp.com/course/Kaggle/test.csv"
test = pd.read_csv(test_url)
#Print the `head` of the train and test dataframes
train.head()
#Import the Numpy library
import numpy as np
from sklearn import naive_bayes
' '.join(dir(naive_bayes))
from sklearn.naive_bayes import BernoulliNB
import numpy as np
import pandas as pd
train = pd.read_csv('/Users/sara/github/data/tatanic_train.csv', sep = ",")
train["Age"] = train["Age"].fillna(train["Age"].median())
#Convert the male and female groups to integer form
train["Sex"][train["Sex"] == "male"] = 0
train["Sex"][train["Sex"] == "female"] = 1
#Impute the Embarked variable
train["Embarked"] = train["Embarked"].fillna('S')
#Convert the Embarked classes to integer form
train["Embarked"][train["Embarked"] == "S"] = 0
train["Embarked"][train["Embarked"] == "C"] = 1
train["Embarked"][train["Embarked"] == "Q"] = 2
#Create the target and features numpy arrays: target, features_one
target = train['Survived'].values
features_one = train[["Pclass", "Sex", "Age", "Fare"]].values
#Create a Classifier
model = BernoulliNB()
# Train the model using the training sets
model.fit(features_one, target)
#Predict Output
# predicted= model.predict([[1,2],[3,4]])
# print predicted
from sklearn.cross_validation import cross_val_score
model = BernoulliNB()
scores = cross_val_score(model, features_one,\
target, cv = 4)
scores.mean()
scores