<a href="https://colab.research.google.com/github/isseikz/memo/blob/master/Untitled4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Pythonで行う画像分類
===

# 目次
1. データを集める
1. モデルを作る
1. データをモデルに学習させる
1. モデルを評価する
1. モデルで予測する

# 1. データを集める

モデルを作るために必要なデータを集めます。


In [0]:
from google.colab import drive
drive.mount('./gdrive')
drive_root_dir="./gdrive/My Drive/MoonCraterDetection"

In [0]:
# スクレイピングの学習 Python3系 その2 (@mizssy07 2018年08月18日に投稿) を参考にした
# https://qiita.com/mizssy07/items/0f4ca2d7e6f9e022c5e2

import bs4
import requests
import re
import urllib.request, urllib.error
import shutil
import os
import argparse
import sys
import json

crater_name_list = [
      "Albategnius", "Aristarchus", "Aristotle", "Bailly", "Clavius",
      "Copernicus", "Humboldt", "Janssen", "Langrenus",
      "Longomontanus", "Maginus", "Metius", "Moretus", "Petavius", "Picard",
      "Piccolomini", "Pitatus", "Plinius", "Rheita", "Russell", "Schickard",
      "Seleucus", "Stadius", "Stöfler", "Thebit", "Theophilus", "Tycho",
      "Vendelinus", "Wargentin"
    ]

def get_soup(url,header):
    return bs4.BeautifulSoup(urllib.request.urlopen(urllib.request.Request(url,headers=header)),'html.parser')

def section1_main():
    
    
    for crater_name in crater_name_list:
      url="https://www.google.co.jp/search?q="+"moon+crater+"+crater_name+"&source=lnms&tbm=isch"
      header={'User-Agent':"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36"}
      soup = get_soup(url,header)
      ActualImages=[]

      save_directory = drive_root_dir + "/resources/" + crater_name
      if os.path.exists(save_directory):
        shutil.rmtree(save_directory)

      os.makedirs(save_directory)

      for a in soup.find_all("div",{"class":"rg_meta"}):
        link , Type =json.loads(a.text)["ou"]  ,json.loads(a.text)["ity"]
        ActualImages.append((link,Type))
      for i , (img , Type) in enumerate( ActualImages):
        try:
          Type = Type if len(Type) > 0 else 'jpg'
          print("Downloading image {} ({}), type is {}".format(i, img, Type))
          raw_img = urllib.request.urlopen(img).read()
          f = open(os.path.join(save_directory , "img_"+str(i)+"."+Type), 'wb')
          f.write(raw_img)
          f.close()
        except Exception as e:
          print ("could not load : "+img)
          print (e)


In [0]:
 section1_main()

Downloading image 0 (https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Albategnius_crater_4101_h2_h3.jpg/220px-Albategnius_crater_4101_h2_h3.jpg), type is jpg
Downloading image 1 (https://upload.wikimedia.org/wikipedia/commons/thumb/4/43/Albategnius_lunar_crater_map.jpg/220px-Albategnius_lunar_crater_map.jpg), type is jpg
Downloading image 2 (https://upload.wikimedia.org/wikipedia/commons/c/c5/Location_of_albategnius_crater.jpg), type is jpg
Downloading image 3 (https://upload.wikimedia.org/wikipedia/commons/thumb/5/59/Albategnius_crater_AS16-M-2472.jpg/220px-Albategnius_crater_AS16-M-2472.jpg), type is jpg
Downloading image 4 (http://andrewplanck.com/wp-content/uploads/2018/05/Albategnius-on-Moon.png), type is png
could not load : http://andrewplanck.com/wp-content/uploads/2018/05/Albategnius-on-Moon.png
HTTP Error 412: Precondition Failed
Downloading image 5 (https://the-moon.us/images/d/d5/Normal_Albategnius040906_04-39-59.jpg), type is jpg
Downloading image 6 (http://i1215.

## 画像を整形する

In [0]:
#【Python】画像の解像度の変更。リサイズ（拡大・縮小） 
# https://www.tech-tech.xyz/python-img-resize.html
# Python, Pillowで画像の一部をトリミング（切り出し/切り抜き）
# https://note.nkmk.me/python-pillow-image-crop-trimming/
import glob
import os
from PIL import Image
import pathlib

rect_width = 250


def resize_downloaded_images():

  for crater_name in crater_name_list:
      
      in_dir = drive_root_dir + "/resources/"+crater_name
      file_name_list = pathlib.Path(in_dir).glob('*.jpg')
      for file in file_name_list:
          print(file)
          
          img = Image.open(file)
          
          img_width, img_height = img.size
          if img_width > img_height:
            resize_width = rect_width / img_height * img_width
            img_resize = img.resize((int(resize_width), rect_width))
            im_crop = img_resize.crop((0, 0, rect_width, rect_width))
            im_crop.save(drive_root_dir + "/resources/resized/" + crater_name + "/" + file_name)
          else:
            resize_height = rect_width / img_width * img_height
            img_resize = img.resize((rect_width, int(resize_height)))
            im_crop = img_resize.crop((0, 0, rect_width, rect_width))
            im_crop.save(drive_root_dir + "/resources/resized/" + crater_name + "/" + file_name)

In [0]:
resize_downloaded_images()

## 画像をテスト用と学習用に分ける

In [0]:
# KerasのCNNで、顔認識AIを作って見た〜スクレイピングからモデルまで〜
# ( @yottyann1221 2019年03月07日に更新) より
# https://qiita.com/yottyann1221/items/a08300b572206075ee9f#%E3%81%AF%E3%81%98%E3%82%81%E3%81%AB

import shutil
import random
import glob
import os

crater_name_list = [
      "Albategnius", "Aristarchus", "Aristotle", "Bailly", "Clavius",
      "Copernicus", "Humboldt", "Janssen", "Langrenus",
      "Longomontanus", "Maginus", "Metius", "Moretus", "Petavius", "Picard",
      "Piccolomini", "Pitatus", "Plinius", "Rheita", "Russell", "Schickard",
      "Seleucus", "Stadius", "Stöfler", "Thebit", "Theophilus", "Tycho",
      "Vendelinus", "Wargentin"
    ]

def divide_data_test_train():
  for crater_name in crater_name_list:
      in_dir = drive_root_dir + "/resources/resized/"+crater_name+"/*"
      in_jpg=glob.glob(in_dir)
      img_file_name_list=os.listdir(drive_root_dir + "/resources/resized/"+crater_name+"/")
      random.shuffle(in_jpg)
      if os.path.exists(drive_root_dir + '/resources/resized/test/' + crater_name):
          shutil.rmtree(drive_root_dir + '/resources/resized/test/' + crater_name)
      os.makedirs(drive_root_dir + '/resources/resized/test/' + crater_name)
      for t in range(len(in_jpg)//5):
          shutil.move(str(in_jpg[t]), drive_root_dir + "/resources/resized/test/"+crater_name)

In [0]:
divide_data_test_train()

## 画像をラベルづけする

In [0]:
# KerasのCNNで、顔認識AIを作って見た〜スクレイピングからモデルまで〜
# ( @yottyann1221 2019年03月07日に更新) より
# https://qiita.com/yottyann1221/items/a08300b572206075ee9f#%E3%81%AF%E3%81%98%E3%82%81%E3%81%AB

import cv2
from keras.utils.np_utils import to_categorical
import numpy as np

# 教師データのラベル付け
X_train = [] 
Y_train = [] 
for crater_name in crater_name_list:
    img_file_name_list=os.listdir(drive_root_dir + "/resources/resized/"+crater_name)
    print("{}:トレーニング用の写真の数は{}枚です。".format(crater_name,len(img_file_name_list)))

    for file_name in img_file_name_list:
        n=os.path.join(drive_root_dir + "/resources/resized/"+crater_name+"/",file_name)  
        img = cv2.imread(n)
        if img is None:
            print('image' + str(j) + ':NoImage')
            continue    
        else:
            r,g,b = cv2.split(img)
            img = cv2.merge([r,g,b])
            X_train.append(img)
            Y_train.append(i)

print("")

# テストデータのラベル付け
X_test = [] # 画像データ読み込み
Y_test = [] # ラベル（名前）
for crater_name in crater_name_list:
    img_file_name_list=os.listdir(drive_root_dir + "/resources/resized/test/"+crater_name)
    print("{}:テスト用の写真の数は{}枚です。".format(crater_name,len(img_file_name_list)))
    for file_name in img_file_name_list:
        n=os.path.join(drive_root_dir + "/resources/resized/test/"+crater_name+"/",file_name)
        img = cv2.imread(n)
        if img is None:
            print('image' + str(j) + ':NoImage')
            continue    
        else:
            r,g,b = cv2.split(img)
            img = cv2.merge([r,g,b])
            X_test.append(img)
            Y_test.append(i)

X_train=np.array(X_train)
X_test=np.array(X_test)
y_train = to_categorical(Y_train)
y_test = to_categorical(Y_test)


# 2. モデルをつくる


In [0]:
# KerasのCNNで、顔認識AIを作って見た〜スクレイピングからモデルまで〜
# ( @yottyann1221 2019年03月07日に更新) より
# https://qiita.com/yottyann1221/items/a08300b572206075ee9f#%E3%81%AF%E3%81%98%E3%82%81%E3%81%AB

from keras.layers import Activation, Conv2D, Dense, Flatten, MaxPooling2D
from keras.models import Sequential

input_shape=(250,250,3)

# モデルの定義
model = Sequential()
model.add(Conv2D(input_shape=input_shape, filters=32,kernel_size=(3, 3), 
                 strides=(1, 1), padding="same"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(filters=32, kernel_size=(3, 3), 
                 strides=(1, 1), padding="same"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(filters=32, kernel_size=(3, 3), 
                 strides=(1, 1), padding="same"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(256))
model.add(Activation("sigmoid"))
model.add(Dense(128))
model.add(Activation('sigmoid'))
# 分類したい人数を入れる
model.add(Dense(len(crater_name_list)))
model.add(Activation('softmax'))

# コンパイル
model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])

# 学習
history = model.fit(X_train, y_train, batch_size=70, 
                    epochs=50, verbose=1, validation_data=(X_test, y_test))