<a href="https://colab.research.google.com/github/archiechang/study/blob/master/adhoc/Python%E3%81%AB%E3%82%88%E3%82%8B%E3%83%87%E3%83%BC%E3%82%BF%E5%88%86%E6%9E%90%E5%85%A5%E9%96%80.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# モデルの評価と改良

In [None]:
%matplotlib inline
from sklearn.datasets import make_blobs
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

In [None]:
#Model Evaluation and Improvement
X,y=make_blobs(random_state=0)
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=0)
logreg=LogisticRegression().fit(X_train,y_train)
print("Test set score:{:.2f}".format(logreg.score(X_test,y_test)))

Test set score:0.88


In [None]:
#Cross-Validation
from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_iris

iris=load_iris()
logreg=LogisticRegression(max_iter=1000)

scores=cross_val_score(logreg,iris.data,iris.target)
print("Cross-validation scores:{}".format(scores))

Cross-validation scores:[0.96666667 1.         0.93333333 0.96666667 1.        ]


In [None]:
scores=cross_val_score(logreg,iris.data,iris.target,cv=5)
print("Cross-validation scores:{}".format(scores))

Cross-validation scores:[0.96666667 1.         0.93333333 0.96666667 1.        ]


In [None]:
print("Average cross-validation score:{:.2f}".format(scores.mean()))

Average cross-validation score:0.97


In [None]:
#Stratified K-Fold cross-validation and other strategies
from sklearn.model_selection import KFold
kfold=KFold(n_splits=5)

In [None]:
print("Cross-validation scores:\n{}".format(
    cross_val_score(logreg,iris.data,iris.target,cv=kfold)
))

Cross-validation scores:
[1.         1.         0.86666667 0.93333333 0.83333333]


In [None]:
kfold=KFold(n_splits=3)
print("Cross-validation scores:\n{}".format(cross_val_score(logreg,iris.data,iris.target,cv=kfold)
))

Cross-validation scores:
[0. 0. 0.]


In [None]:
kfold=KFold(n_splits=3,shuffle=True,random_state=0)
print("Cross-validation scores:\n{}".format(
    cross_val_score(logreg,iris.data,iris.target,cv=kfold)
))

Cross-validation scores:
[0.98 0.96 0.96]


In [None]:
#Leave-one-out cross-validation
from sklearn.model_selection import LeaveOneOut
loo=LeaveOneOut()
scores=cross_val_score(logreg,iris.data,iris.target,cv=loo)
print("Number of cv iterations:",len(scores))
print("Mean accuracy:{:.2f}".format(scores.mean()))

Number of cv iterations: 150
Mean accuracy:0.97


In [None]:
#Shuffle-split cross-validation
from sklearn.model_selection import ShuffleSplit
shuffle_split = ShuffleSplit(test_size=.5, train_size=.5, n_splits=10)
scores = cross_val_score(logreg, iris.data, iris.target, cv=shuffle_split)
print("Cross-validation scores:\n{}".format(scores))

Cross-validation scores:
[0.97333333 0.97333333 0.93333333 0.97333333 0.92       0.97333333
 0.93333333 0.92       0.96       0.94666667]


In [None]:
#Cross-validation with groups
from sklearn.model_selection import GroupKFold
grout_kfold=GroupKFold(n_splits=3)
X,y=make_blobs(n_samples=12,random_state=0)
# assume the first three samples belong to the same group,
# then the next four, etc.
groups=[0,0,0,1,1,1,1,2,2,3,3,3]
scores=cross_val_score(logreg,X,y,groups=groups,cv=grout_kfold)
print("Cross-validation scores:\n{}".format(scores))

Cross-validation scores:
[0.75       0.6        0.66666667]


# 第三章　関数・ファイルの扱い

## 3.2 関数

### 3.2.2 複数の値を戻す

In [None]:
def f():
    a=5
    b=6
    c=7
    return a,b,c

a,b,c=f()
return_value =f()
print(a,b,c)
print(return_value)

5 6 7
(5, 6, 7)


In [None]:
def f():
    a=5
    b=6
    c=7
    return {'a':a,'b':b,'c':c}

list_a=f()
print(list_a['a'])

5


### 3.2.3 正規表現(re)

In [None]:
import re

In [None]:
status=['   Alabama','Georgia!','Georgia','georgia','FlOrIda','south   carolina##','West virginia?']

In [None]:
#方法1
def clean_strings(strings):
    result=[]
    for value in strings:
        #両端（先頭、末尾）の文字を削除
        value=value.strip()
        #正規表現にマッチする文字列を削除
        value=re.sub('[!#?]','',value)
        #単語の先頭の一文字を大文字、他を小文字に変換
        value=value.title()
        result.append(value)
    return result

In [None]:
clean_strings(status)

['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South   Carolina',
 'West Virginia']

In [None]:
#方法2
def remove_punctuation(value):
    return re.sub('[!#?]','',value)

clean_ops =[str.strip,remove_punctuation,str.title]

def clean_strings(strings,ops):
    result=[]
    for value in strings:
        for function in ops:
            value=function(value)
        result.append(value)
    return result

In [None]:
clean_strings(status,clean_ops)

['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South   Carolina',
 'West Virginia']

### 3.2.4 無名(ラムダ)関数

In [None]:
def apply_to_list(some_list,f):
    return [f(x) for x in some_list]

ints=[4,0,1,5,6]
apply_to_list(ints,lambda x: x * 2)

[8, 0, 2, 10, 12]

In [None]:
strings=['foo','card','bar','aaaa','abab']
strings.sort(key=lambda x: len(set(list(x))))
strings

['aaaa', 'foo', 'abab', 'bar', 'card']

### 3.2.6 ジェネレータ式

In [None]:
some_dict={'a':1,'b':2,'c':3,'d':4}
for key in some_dict:
    print(key)

a
b
c
d


In [None]:
dict_iterator=iter(some_dict)

In [None]:
dict_iterator

<dict_keyiterator at 0x7f68784e9b30>

# 第四章　Numpyの基礎

## 4.1　Numpy ndarray:多次元配列オブジェクト

In [None]:
import numpy as np

In [None]:
data=np.random.randn(2,3)
data

array([[-0.9326933 ,  0.04395013, -0.19643556],
       [-1.19673591,  0.20403647, -0.10701998]])

In [None]:
data*10

array([[ -9.32693297,   0.4395013 ,  -1.96435563],
       [-11.96735909,   2.04036466,  -1.07019979]])

In [None]:
data + data

array([[-1.86538659,  0.08790026, -0.39287113],
       [-2.39347182,  0.40807293, -0.21403996]])

In [None]:
data.shape

(2, 3)

In [None]:
data.dtype

dtype('float64')

In [None]:
data1=[6,7.5,8,0,1]
arr1=np.array(data1)
arr1

array([6. , 7.5, 8. , 0. , 1. ])

In [None]:
data2=[[1,2,3,4],[5,6,7,8]]
arr2=np.array(data2)
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [None]:
np.zeros((1,2))

array([[0., 0.]])

In [None]:
np.ones((4,4))

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [None]:
np.arange(1,10,2)

array([1, 3, 5, 7, 9])

In [None]:
arr=np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
arr[5]

5

In [None]:
arr[5:8]

array([5, 6, 7])

In [None]:
arr_slice=arr[5:8]
arr_slice

array([5, 6, 7])

In [None]:
arr_slice[1]=123

In [None]:
arr_slice

array([  5, 123,   7])

In [None]:
arr

array([  0,   1,   2,   3,   4,   5, 123,   7,   8,   9])

In [None]:
arr_slice[:]=12345
arr

array([    0,     1,     2,     3,     4, 12345, 12345, 12345,     8,
           9])

In [None]:
arr2d=np.array([[1,2,3],[4,5,6],[7,8,9]])

arr2d[2]

array([7, 8, 9])

In [None]:
arr2d[1][2]

6

In [None]:
arr3d=np.array([[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]]])
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [None]:
arr3d[0]

array([[1, 2, 3],
       [4, 5, 6]])

In [None]:
old_values=arr3d[0].copy()
arr3d[0]=42
arr3d

array([[[42, 42, 42],
        [42, 42, 42]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [None]:
arr3d[0]=old_values
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [None]:
arr

array([    0,     1,     2,     3,     4, 12345, 12345, 12345,     8,
           9])

In [None]:
arr[1:6]

array([    1,     2,     3,     4, 12345])

In [None]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [None]:
arr2d[:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [None]:
arr2d[:2,1:]

array([[2, 3],
       [5, 6]])

In [None]:
names=np.array(['Bob','Joe','Will','Bob','Will','Joe','Joe'])
data=np.random.randn(7,4)
names

array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')

In [None]:
data

array([[-0.55304421,  0.41179632, -0.75225757,  0.30501789],
       [ 0.38963885,  1.3910213 ,  1.44189062,  0.55237093],
       [-0.29489155,  0.67360573,  0.69075412,  1.7641949 ],
       [-0.32194728, -2.53140759,  1.6101871 , -1.23145571],
       [-0.70666524,  0.22744804, -0.31917576, -0.61004512],
       [ 2.12090111,  0.76288818, -1.27931926,  0.66919797],
       [ 1.09954216,  0.00577524,  0.79173574, -0.33077496]])

In [None]:
data[names=='Bob']

array([[-0.55304421,  0.41179632, -0.75225757,  0.30501789],
       [-0.32194728, -2.53140759,  1.6101871 , -1.23145571]])

In [None]:
names=='Bob'

array([ True, False, False,  True, False, False, False])

In [None]:
data[names=='Bob',2:]

array([[-0.75225757,  0.30501789],
       [ 1.6101871 , -1.23145571]])

In [None]:
arr=np.empty((8,4))
for i in range(8):
    arr[i]=i

arr

array([[0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.]])

In [None]:
arr[[4,3,0,6]]

array([[4., 4., 4., 4.],
       [3., 3., 3., 3.],
       [0., 0., 0., 0.],
       [6., 6., 6., 6.]])

In [None]:
index=[4,3,0,6]
arr[index]

array([[4., 4., 4., 4.],
       [3., 3., 3., 3.],
       [0., 0., 0., 0.],
       [6., 6., 6., 6.]])