In [15]:
from sklearn.neighbors import KNeighborsClassifier as KNNC
from sklearn.naive_bayes import GaussianNB as NBC
from sklearn.svm import SVC
from sklearn.model_selection import KFold, cross_val_score

In [16]:
knn = KNNC()
gnb = NBC()
svc = SVC()

In [14]:
from sklearn.datasets import load_digits
digits = load_digits()

In [32]:
kfold = KFold(n_splits=10, random_state=11, shuffle=True)

In [33]:
estimator = {'KNN': knn, 'NBC': gnb, 'SVM': svc}
estimator = estimator.items()

In [35]:
best = None
accuracy = 0
for e_name, e_obj in estimator:
    scores = cross_val_score(estimator=e_obj, X=digits.data, y=digits.target, cv=kfold)
    if scores.mean() > accuracy:
        best, accuracy = e_name, scores.mean()
    print(f'Mean accuracy of {e_name:} is {scores.mean():0.2%}')

print(f'The best model is {best} with accuracy {accuracy:0.2%}')

Mean accuracy of KNN is 98.72%
Mean accuracy of NBC is 84.48%
Mean accuracy of SVM is 98.72%
The best model is KNN with accuracy 98.72%


In [42]:
best_k = 0
accuracy = 0
for k in range(1, 20, 2):
    knn = KNNC(n_neighbors=k)
    kfold = KFold(n_splits=15, random_state=11, shuffle=True)
    scores = cross_val_score(estimator=knn, X=digits.data, y=digits.target, cv=kfold)
    if scores.mean() > accuracy:
        accuracy = scores.mean()
        best_k = k
    print(f'Mean accuracy with k {k}: {scores.mean():0.2%}')
print(f'The best k value is {best_k}')

Mean accuracy with k 1: 98.83%
Mean accuracy with k 3: 98.78%
Mean accuracy with k 5: 98.83%
Mean accuracy with k 7: 98.50%
Mean accuracy with k 9: 98.44%
Mean accuracy with k 11: 98.39%
Mean accuracy with k 13: 98.11%
Mean accuracy with k 15: 97.94%
Mean accuracy with k 17: 97.77%
Mean accuracy with k 19: 97.77%
The best k value is 5


In [43]:
import pandas as pd

In [52]:
nyc = pd.read_csv('ave_hi_nyc_jan_1895-2018.csv')
nyc

Unnamed: 0,Date,Value,Anomaly
0,189501,34.2,-3.2
1,189601,34.7,-2.7
2,189701,35.5,-1.9
3,189801,39.6,2.2
4,189901,36.4,-1.0
...,...,...,...
119,201401,35.5,-1.9
120,201501,36.1,-1.3
121,201601,40.8,3.4
122,201701,42.8,5.4


In [53]:
nyc.columns = ['Date', 'Temperature', 'Anomaly']
nyc

Unnamed: 0,Date,Temperature,Anomaly
0,189501,34.2,-3.2
1,189601,34.7,-2.7
2,189701,35.5,-1.9
3,189801,39.6,2.2
4,189901,36.4,-1.0
...,...,...,...
119,201401,35.5,-1.9
120,201501,36.1,-1.3
121,201601,40.8,3.4
122,201701,42.8,5.4


In [54]:
nyc.Date = nyc.Date.floordiv(100)
nyc

Unnamed: 0,Date,Temperature,Anomaly
0,1895,34.2,-3.2
1,1896,34.7,-2.7
2,1897,35.5,-1.9
3,1898,39.6,2.2
4,1899,36.4,-1.0
...,...,...,...
119,2014,35.5,-1.9
120,2015,36.1,-1.3
121,2016,40.8,3.4
122,2017,42.8,5.4


In [60]:
# X = nyc.Date.values.reshape(-1, 1)
X = nyc[['Date']].values

In [65]:
y = nyc.Temperature.values

In [69]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [70]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=10, test_size=0.20)

In [73]:
lr = LinearRegression()
lr.fit(X_train, y_train)