In [13]:
import numpy as np
import pandas as pd
titanic = pd.read_csv("titanic_data.csv")

---
### Classifier examples

In [10]:
# Logistic Regression
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression()

In [7]:
# Neural Networks
from sklearn.neural_network import MLPClassifier
classifier = MLPClassifier()

In [8]:
# Decision Trees
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier()

In [12]:
# Support Vector Machines
from sklearn.svm import SVC
classifier = SVC()

---
### Testing your models

In [14]:
# Import statements 
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.cross_validation import train_test_split
import pandas as pd
import numpy as np

# Read in the data.
data = np.asarray(pd.read_csv('data.csv', header=None))
# Assign the features to the variable X, and the labels to the variable y. 
X = data[:,0:2]
y = data[:,2]

# Use train test split to split your data 
# Use a test size of 25% and a random state of 42
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 42)

# Instantiate your decision tree model
model = DecisionTreeClassifier()

# TODO: Fit the model to the training data.
model.fit(X_train, y_train)

# TODO: Make predictions on the test data
y_pred = model.predict(X_test)

# TODO: Calculate the accuracy and assign it to the variable acc on the test data.
acc = accuracy_score(y_test, y_pred)

In [61]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

data = np.asarray(pd.read_csv("data.csv", header = None))
X = data[:, 0:1]
y = data[:, 1:2]

classifier = LinearRegression()
classifier.fit(X,y)
guesses = classifier.predict(X)
abs_error = mean_absolute_error(y, guesses)
print("Mean Absolute Error: " + str(abs_error))
sqr_error = mean_squared_error(y, guesses)
print("Mean Square Error: " + str(sqr_error))
r2s = r2_score(y, guesses)
print("R2 Score: " + str(r2s))

Mean Absolute Error: 0.221235199426
Mean Square Error: 0.0659449530027
R2 Score: 0.00961961008498


## Life Expectancy

In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

# Load data
bmi_life_data = pd.read_csv("bmi_and_life_expectancy.csv")
x = bmi_life_data[['BMI']]
y = bmi_life_data[['Life expectancy']]

# Make and fit the linear regression model
bmi_life_model = LinearRegression()
bmi_life_model.fit(x, y)

# Make a prediction using the model
laos_life_exp = bmi_life_model.predict(21.07931)

In [13]:
def p(x1, x2, w1 = 2, w2 = 1, b = -18):
    r = w1*x1 + w2*x2 + b
    if r < 0: print("Nope") 
    else: print("Yeap")
    return r

In [14]:
p(7, 6, w1 = 1.5)

Nope


-1.5

---
## Perceptron Algorithm

In [3]:
import numpy as np
np.random.seed(42)

def stepFunction(t):
    if t >= 0:
        return 1
    return 0

def prediction(X, W, b):
    return stepFunction((np.matmul(X,W)+b)[0])

# TODO: Fill in the code below to implement the perceptron trick.
# The function should receive as inputs the data X, the labels y,
# the weights W (as an array), and the bias b,
# update the weights and bias W, b, according to the perceptron algorithm,
# and return W and b.
def perceptronStep(X, y, W, b, learn_rate = 0.01):
    
    for i in range(len(X)):
        y_hat = prediction(X[i], W, b)
        if y_hat != y[i] and y_hat < y[i]:
            for j in range(len(W)):
                W[j] += X[i][j] * learn_rate
                b += learn_rate
        elif y_hat != y[i] and y_hat > y[i]:
            for j in range(len(W)):
                W[j] -= X[i][j] * learn_rate
                b -= learn_rate
    return W, b
    
# This function runs the perceptron algorithm repeatedly on the dataset,
# and returns a few of the boundary lines obtained in the iterations,
# for plotting purposes.
# Feel free to play with the learning rate and the num_epochs,
# and see your results plotted below.
def trainPerceptronAlgorithm(X, y, learn_rate = 0.01, num_epochs = 25):
    x_min, x_max = min(X.T[0]), max(X.T[0])
    y_min, y_max = min(X.T[1]), max(X.T[1])
    W = np.array(np.random.rand(2,1))
    b = np.random.rand(1)[0] + x_max
    # These are the solution lines that get plotted below.
    boundary_lines = []
    for i in range(num_epochs):
        # In each epoch, we apply the perceptron step.
        W, b = perceptronStep(X, y, W, b, learn_rate)
        boundary_lines.append((-W[0]/W[1], -b/W[1]))
    return boundary_lines


---
## Sklearn Review

In [5]:
import sklearn
sklearn.__version__

'0.19.2'