In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

### A linear regression example

We're going to look at the relationship between height in weight in 18 year old humans using [data](http://socr.ucla.edu/docs/resources/SOCR_Data/SOCR_Data_Dinov_020108_HeightsWeights.html) from UCLA's Statistics Online Computational Resource 



In [None]:
# first let's load the data
hwdata = pd.read_csv('SOCR-HeightWeight.csv')

print(hwdata.shape)

hwdata.head(20)

In [None]:
# by convention the independent variable is called X
# in a more complicated ML example, each entry would be a vector of features
# but in this example, it's just a number representing the height
X = hwdata['height']

# also by convention, the dependent variable is called y
y = hwdata['weight']

In [None]:
# to get a feel for how linear the weight vs height data is let's look at 
# the first 100 points in the data

plt.plot(X[:100],y[:100], 'o')

In [None]:
# the next two lines are the entire ML model building portion of the example

lin_reg = LinearRegression()

lin_reg.fit(np.c_[X],y)


print('intercept = ', lin_reg.intercept_ )
print('slope = ', lin_reg.coef_[0])

In [None]:
def guess_your_weight(height_in_inches):
    
    predicted_weight = lin_reg.coef_[0]*height_in_inches + lin_reg.intercept_
    print(f"For the height of {height_in_inches} the predicted weight is {predicted_weight}")
    return predicted_weight

guess_your_weight(68)

In [None]:
X_min = X.min()
X_max = X.max()
y_min = guess_your_weight(X_min)
y_max = guess_your_weight(X_max)

plt.plot(X,y, 'o')
plt.plot([X_min,X_max],[y_min,y_max],color='r')

### A classification example

Now we're going to look at a completely contrived example to see if we can use the general weather and temperature to decide if children are going to play on the local playground

In [None]:
weather = ['Sunny','Sunny','Overcast','Rainy','Rainy','Rainy','Overcast',
           'Sunny','Sunny','Rainy','Sunny','Overcast','Overcast','Rainy']

temp = ['Hot','Hot','Hot','Mild','Cool','Cool','Cool',
        'Mild','Cool','Mild','Mild','Mild','Hot','Mild']

play = ['No', 'No','Yes', 'Yes','Yes','No','Yes',
        'No','Yes','Yes','Yes','Yes','Yes','No']

In [None]:
le = LabelEncoder()

weather_encoded = le.fit_transform(weather)
temp_encoded = le.fit_transform(temp)

features = np.c_[weather_encoded, temp_encoded]

label = le.fit_transform(play)

In [None]:
print(play, label)
print(weather, weather_encoded)
print(temp, temp_encoded)

In [None]:
print(features)

In [None]:
knn_clf = KNeighborsClassifier(n_neighbors=3)

knn_clf.fit(features,label)

will_play = knn_clf.predict([[0,2]]) # overcast and mild

print(will_play)