# Dog Breed Predictions

In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import cv2
import random
import tensorflow as tf
import requests
import re
import wikipedia

from skimage.io import imread
from tensorflow import keras
from tensorflow.keras.constraints import MaxNorm
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout
from tensorflow.keras.layers import Flatten, Dense, Activation, BatchNormalization
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications.resnet_v2 import ResNet50V2, decode_predictions, preprocess_input
from tensorflow.keras.applications.vgg16 import VGG16, decode_predictions, preprocess_input
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications.xception import Xception

In [2]:
train_path = '/Users/cris/Repositorios/local_docs/curso_ds21/dog-breed-identification/train/'
test_path = '/Users/cris/Repositorios/local_docs/curso_ds21/dog-breed-identification/test/'
target_df = pd.read_csv('/Users/cris/Repositorios/local_docs/curso_ds21/dog-breed-identification/labels.csv')

In [3]:
for i in range(len(target_df['id'])):
    target_df['id'][i] = target_df['id'][i] + '.jpg'

## Image processing and load memory

In [None]:
labels_target = target_df['breed'].unique()

In [None]:
labels_int = pd.DataFrame({'target':labels_target,
                           'target_int':0})

In [None]:
for i in range(len(labels_target)):
    labels_int['target_int'][i]=i

In [None]:
final_target = target_df.copy()

In [None]:
final_target = final_target.merge(labels_int, left_on='breed', right_on='target')

In [None]:
target_int = final_target.copy()

In [None]:
target_int.drop(columns='target', inplace=True)

In [None]:
target_int = target_int.astype('str')

### Imágenes en greyscale

In [None]:
imagenes = []
filenames = os.listdir(train_path)[:4000]

for img in os.listdir(train_path)[:4000]:
    image = train_path + img
    img_gray = cv2.imread(image, cv2.IMREAD_GRAYSCALE)
    smallimage = cv2.resize(img_gray, (100, 100))
    smallimage = smallimage / 255.
    imagenes.append(smallimage)

#smallimage = smallimage.reshape(100, 100, 1)

In [None]:
filenames = pd.DataFrame({'archivo':filenames})

In [None]:
file = filenames.merge(target_int, left_on='archivo', right_on='id')

In [None]:
X_train_o = np.array(imagenes)

In [None]:
X_train_o = X_train_o.reshape(100, 100, 3)

In [None]:
X_train_o = imagenes

In [None]:
plt.imshow(X_train_o[1004])

### Imágenes color

In [None]:
imagenes = []
filenames = os.listdir(train_path)[:6000]

for img in os.listdir(train_path)[:6000]:
    image = train_path + img
    img_gray = cv2.imread(image)
    smallimage = cv2.resize(img_gray, (100, 100))
    imagenes.append(smallimage)

#smallimage = smallimage.reshape(100, 100, 1)

In [None]:
filenames = pd.DataFrame({'archivo':filenames})

In [None]:
file = filenames.merge(target_int, left_on='archivo', right_on='id')

In [None]:
X_train_o = np.array(imagenes)

In [None]:
X_train_o = X_train_o / 255.0

In [None]:
print(X_train_o[0].shape)

### Y_train, split train-test, & save

In [None]:
y_train_o = file['target_int']

In [None]:
y_train_o = np.array(y_train_o).astype('int')

In [None]:
y_train_o.shape

In [None]:
X_train = X_train_o[:5000].copy()
y_train = y_train_o[:5000].copy()

X_test = X_train_o[5000:].copy()
y_test = y_train_o[5000:].copy()

In [None]:
np.savez('data.npz', X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)

In [None]:
data = np.load('data.npz')

X_train = data['X_train']
X_test = data['X_test']
y_train = data['y_train']
y_test = data['y_test']

## 1st Model Sequential-1 without pretrain

In [None]:
model = keras.models.Sequential([
    keras.layers.Conv2D(64, 7, activation="relu", padding="same",
                        input_shape=(100, 100, 3)),
    keras.layers.MaxPooling2D(2),
    keras.layers.Conv2D(128, 3, activation="relu", padding="same"),
    keras.layers.Conv2D(128, 3, activation="relu", padding="same"),
    keras.layers.MaxPooling2D(2),
    keras.layers.Conv2D(256, 3, activation="relu", padding="same"),
    keras.layers.Conv2D(256, 3, activation="relu", padding="same"),
    keras.layers.MaxPooling2D(2),
    keras.layers.Flatten(),
    keras.layers.Dense(128, activation="relu"),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(64, activation="relu"),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(120, activation="softmax")])

In [None]:
model.compile(optimizer="sgd",
              loss='sparse_categorical_crossentropy',
              metrics=['acc'])

In [None]:
cb_model1 = keras.callbacks.ModelCheckpoint("cb_model1.h5")

In [None]:
model1 = model.fit(X_train_, y_train_, validation_data=(X_test, y_test),
          epochs=25, batch_size=32, callbacks=cb_model1)

In [None]:
model1.evaluate(X_test, y_test)

In [None]:
model.fit(X_train, y_train, epochs=25,
          batch_size=64, validation_split=.5)

## 2nd Model Sequential-2 without pretrain

In [None]:
model = Sequential()

model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(100, 100, 3)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(120, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
cb_model2 = keras.callbacks.ModelCheckpoint("cb_model2.h5")

In [None]:
model2 = model.fit(X_train, y_train, epochs=10,
          batch_size=64, validation_data=(X_test, y_test), callbacks=[cb_model2])

In [None]:
model2.evaluate(X_test, y_test)

## 3rd Model (Image Generator-1) without pretrain

In [None]:
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range = 30,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.2, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip = True,  # randomly flip images
        vertical_flip=False)  # randomly flip images

datagen.fit(X_train)

In [None]:
model = Sequential()
model.add(Conv2D(32, 3, padding="same",
          activation="relu", input_shape=(100, 100, 3)))
model.add(MaxPooling2D(2))

model.add(Conv2D(64, 3, padding="same", activation="relu"))
model.add(MaxPooling2D(2))
model.add(Dropout(0.4))

model.add(Conv2D(128, 3, padding="same", activation="relu"))
model.add(MaxPooling2D(2))
model.add(Dropout(0.4))

model.add(Flatten())
model.add(Dense(128, activation="relu"))
model.add(Dense(120, activation="softmax"))

model.summary()

In [None]:
cb_model3 = keras.callbacks.ModelCheckpoint("cb_model3.h5")

In [None]:
model3 = model.fit(X_train_, y_train_, validation_data=(X_test, y_test),
          epochs=25, batch_size=64, callbacks=cb_model3)

In [None]:
model3.evaluate(X_test, y_test)

## 4th Model (Image Generator-2) without pretrain

In [None]:
IMAGE_HEIGHT = 100
IMAGE_WIDTH = 100
IMAGE_CHANNELS=3
IMAGE_SIZE=(IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_CHANNELS)

In [None]:
train_df, val_df = train_test_split(target_df,
                                    test_size=0.20,
                                    random_state=42)

train_df = train_df.reset_index(drop=True)
val_df = validate_df.reset_index(drop=True)

In [None]:
train_datagen = ImageDataGenerator(rotation_range=15,
                                   rescale=1./255,
                                   shear_range=0.1,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   width_shift_range=0.1,
                                   height_shift_range=0.1)

val_datagen = ImageDataGenerator(rotation_range=15,
                                   rescale=1./255,
                                   shear_range=0.1,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   width_shift_range=0.1,
                                   height_shift_range=0.1)

In [None]:
train_gen = train_datagen.flow_from_dataframe(
    train_df, 
    train_path, 
    x_col='id',
    y_col='breed',
    target_size=(IMAGE_HEIGHT, IMAGE_WIDTH),
    class_mode='sparse',
    batch_size=64)

val_gen = val_datagen.flow_from_dataframe(
    val_df,
    train_path,
    x_col='id',
    y_col='breed',
    target_size=(IMAGE_HEIGHT, IMAGE_WIDTH),
    class_mode='sparse',
    batch_size=64)

In [None]:
layers4 = [
    keras.layers.Conv2D(64,(3,3), activation = 'relu', input_shape=IMAGE_SIZE),
    keras.layers.MaxPooling2D(pool_size = (2,2)),

    keras.layers.Conv2D(64,(3,3), activation = 'relu'),
    keras.layers.MaxPooling2D(pool_size = (2,2)),

    keras.layers.Flatten(),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(120, activation='sigmoid')
]

model = keras.Sequential(layers4)

model.compile(optimizer="adam",
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
layers4_1 = [
    keras.layers.Conv2D(64,(3,3), activation = 'relu', input_shape=IMAGE_SIZE),
    keras.layers.MaxPooling2D(pool_size = (2,2)),

    keras.layers.Conv2D(64,(3,3), activation = 'relu'),
    keras.layers.MaxPooling2D(pool_size = (2,2)),

    keras.layers.Flatten(),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(120, activation='sigmoid')
]

model = keras.Sequential(layers4)

model.compile(optimizer="adam",
              loss='sparse_categorical_crossentropy',
              metrics='accuracy')

In [None]:
cb_model4 = keras.callbacks.ModelCheckpoint("cb_model4.h5")
cb_model4_1 = keras.callbacks.ModelCheckpoint("cb_model4_1.h5")

In [None]:
model4 = model.fit(train_gen,
                   epochs=50,
                   validation_data=val_gen,
                   callbacks=cb_model4)

In [None]:
model4.evaluate(X_test, y_test)

## 5th Model VGG16-1

In [None]:
IMAGE_HEIGHT = 100
IMAGE_WIDTH = 100
IMAGE_CHANNELS=3
IMAGE_SIZE=(IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_CHANNELS)

In [None]:
train_df, val_df = train_test_split(target_df,
                                    test_size=0.20,
                                    random_state=42)

train_df = train_df.reset_index(drop=True)
val_df = validate_df.reset_index(drop=True)

In [None]:
train_datagen = ImageDataGenerator(rotation_range=15,
                                   rescale=1./255,
                                   shear_range=0.1,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   width_shift_range=0.1,
                                   height_shift_range=0.1)

val_datagen = ImageDataGenerator(rotation_range=15,
                                   rescale=1./255,
                                   shear_range=0.1,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   width_shift_range=0.1,
                                   height_shift_range=0.1)

In [None]:
train_gen = train_datagen.flow_from_dataframe(
    train_df, 
    train_path, 
    x_col='id',
    y_col='breed',
    target_size=(IMAGE_HEIGHT, IMAGE_WIDTH),
    class_mode='sparse',
    batch_size=64)

val_gen = val_datagen.flow_from_dataframe(
    val_df,
    train_path,
    x_col='id',
    y_col='breed',
    target_size=(IMAGE_HEIGHT, IMAGE_WIDTH),
    class_mode='sparse',
    batch_size=64)

In [None]:
base_model = VGG16(input_shape=IMAGE_SIZE,
                   include_top=False,
                   weights='imagenet')

for layer in base_model.layers:
    layer.trainable = False

In [None]:
layers5 = keras.layers.Flatten()(base_model.output)

layers5 = keras.layers.Dense(512, activation='relu')(layers5)

layers5 = keras.layers.Dense(512, activation='relu')(layers5)

layers5 = keras.layers.Dropout(0.2)(layers5)

layers5 = keras.layers.Dense(120, activation='softmax')(layers5)

model = keras.models.Model(base_model.input, layers5)

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics='acc')

In [None]:
cb_model5_1 = keras.callbacks.ModelCheckpoint("cb_model5.h5_1")

best_acc_model5_1 = keras.callbacks.ModelCheckpoint(
    "best_acc_model5_1.h5", monitor='val_acc', mode='auto', verbose=1, save_best_only=True)

best_loss_model5_1 = keras.callbacks.ModelCheckpoint(
    "best_loss_model5_1.h5", monitor='val_loss', mode='auto', verbose=1, save_best_only=True)

earlystop_model5_1 = EarlyStopping(monitor='val_loss',
                                 min_delta=0,
                                 patience=5,
                                 verbose=1,
                                 restore_best_weights=True)

In [None]:
model5 = keras.models.load_model("best_model5.h5")

In [None]:
model5_1 = model5.fit(train_gen,
                   validation_data=val_gen,
                   epochs=25,
                   callbacks=[cb_model5_1,
                              best_acc_model5_1,
                              best_loss_model5_1,
                              earlystop_model5_1])

## 6th Model VGG16-2

In [None]:
IMAGE_HEIGHT = 100
IMAGE_WIDTH = 100
IMAGE_CHANNELS=3
IMAGE_SIZE=(IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_CHANNELS)

In [None]:
train_df, val_df = train_test_split(target_df,
                                    test_size=0.20,
                                    random_state=42)

train_df = train_df.reset_index(drop=True)
val_df = validate_df.reset_index(drop=True)

In [None]:
train_datagen = ImageDataGenerator(rescale=1./255)

val_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
train_gen = train_datagen.flow_from_dataframe(
    train_df, 
    train_path, 
    x_col='id',
    y_col='breed',
    target_size=(IMAGE_HEIGHT, IMAGE_WIDTH),
    class_mode='sparse',
    batch_size=64)

val_gen = val_datagen.flow_from_dataframe(
    val_df,
    train_path,
    x_col='id',
    y_col='breed',
    target_size=(IMAGE_HEIGHT, IMAGE_WIDTH),
    class_mode='sparse',
    batch_size=64)

In [None]:
base_model = VGG16(input_shape=IMAGE_SIZE,
                   include_top=False,
                   weights='imagenet')

for layer in base_model.layers:
    layer.trainable = False

In [None]:
layers5 = keras.layers.Flatten()(base_model.output)

layers5 = keras.layers.Dense(128, activation='relu')(layers5)
layers5 = keras.layers.Dropout(0.2)(layers5)

layers5 = keras.layers.Dense(256, activation='relu')(layers5)
layers5 = keras.layers.Dropout(0.2)(layers5)

layers5 = keras.layers.Dense(512, activation='relu')(layers5)
layers5 = keras.layers.Dropout(0.5)(layers5)
layers5 = keras.layers.Dense(120, activation='softmax')(layers5)

model = keras.models.Model(base_model.input, layers5)

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics='acc')

In [None]:
#cb_model6 = keras.callbacks.ModelCheckpoint("6_cb_model5.h")

best_acc_model6 = keras.callbacks.ModelCheckpoint(
    "7_best_acc_model.h5", monitor='val_acc', mode='auto', verbose=1, save_best_only=True)

best_loss_model6 = keras.callbacks.ModelCheckpoint(
    "7_best_loss_model.h5", monitor='val_loss', mode='auto', verbose=1, save_best_only=True)

earlystop_model6 = EarlyStopping(monitor='val_loss',
                                 min_delta=0,
                                 patience=5,
                                 verbose=1,
                                 restore_best_weights=True)

In [None]:
#model6 = keras.models.load_model("best_model6.h5")

In [None]:
model6 = model.fit(train_gen,
                   validation_data=val_gen,
                   epochs=25,
                   callbacks=[cb_model6,
                              best_acc_model6,
                              best_loss_model6,
                              earlystop_model6])

## 7th Model Inception V3

In [None]:
IMAGE_HEIGHT = 150
IMAGE_WIDTH = 150
IMAGE_CHANNELS = 3
IMAGE_SIZE=(IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_CHANNELS)

In [None]:
train_df, val_df = train_test_split(target_df,
                                    test_size=0.15,
                                    random_state=42)

train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)

In [None]:
train_datagen = ImageDataGenerator(rotation_range=40,
                                   rescale=1./255.,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2)

val_datagen = ImageDataGenerator(rescale=1./255.)

In [None]:
train_gen = train_datagen.flow_from_dataframe(
    train_df, 
    train_path, 
    x_col='id',
    y_col='breed',
    target_size=(IMAGE_HEIGHT, IMAGE_WIDTH),
    class_mode='sparse',
    batch_size=32)

val_gen = val_datagen.flow_from_dataframe(
    val_df,
    train_path,
    x_col='id',
    y_col='breed',
    target_size=(IMAGE_HEIGHT, IMAGE_WIDTH),
    class_mode='sparse',
    batch_size=32)

In [None]:
base_model_inception = InceptionV3(input_shape=IMAGE_SIZE,
                                   include_top=False,
                                   weights='imagenet')

for layer in base_model_inception.layers:
    layer.trainable = False

In [None]:
x = keras.layers.Flatten()(base_model_inception.output)

x = keras.layers.Dense(512, activation='relu')(x)

x = keras.layers.Dropout(0.2)(x)

x = keras.layers.Dense(120, activation='softmax')(x)

model = keras.models.Model(base_model_inception.input, x)

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics='acc')

In [None]:
#cb_model = keras.callbacks.ModelCheckpoint("7_cb_model5.h")

best_acc_model = keras.callbacks.ModelCheckpoint(
    "7_best_acc_model.h5", monitor='val_acc', mode='auto', verbose=1, save_best_only=True)

best_loss_model = keras.callbacks.ModelCheckpoint(
    "7_best_loss_model.h5", monitor='val_loss', mode='auto', verbose=1, save_best_only=True)

earlystop_model = EarlyStopping(monitor='val_loss',
                                min_delta=0,
                                patience=5,
                                verbose=1,
                                restore_best_weights=True)

In [None]:
model = keras.models.load_model("7_best_acc_model.h5")

In [None]:
model = model.fit(train_gen,
                   validation_data=val_gen,
                   epochs=25,
                   callbacks=[best_acc_model])

## 8th Model Xception

In [4]:
IMAGE_HEIGHT = 200
IMAGE_WIDTH = 200
IMAGE_CHANNELS = 3
IMAGE_SIZE=(IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_CHANNELS)

In [5]:
train_df, val_df = train_test_split(target_df,
                                    test_size=0.15,
                                    random_state=42)

train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)

In [6]:
train_datagen = ImageDataGenerator(rotation_range=40,
                                   rescale=1./255.,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2)

val_datagen = ImageDataGenerator(rescale=1./255.)

In [7]:
train_gen = train_datagen.flow_from_dataframe(
    train_df, 
    train_path, 
    x_col='id',
    y_col='breed',
    target_size=(IMAGE_HEIGHT, IMAGE_WIDTH),
    class_mode='sparse',
    batch_size=32)

val_gen = val_datagen.flow_from_dataframe(
    val_df,
    train_path,
    x_col='id',
    y_col='breed',
    target_size=(IMAGE_HEIGHT, IMAGE_WIDTH),
    class_mode='sparse',
    batch_size=32)

Found 8688 validated image filenames belonging to 120 classes.
Found 1534 validated image filenames belonging to 120 classes.


In [None]:
base_model = Xception(input_shape=IMAGE_SIZE,
                      include_top=False,
                      weights="imagenet")

for layer in base_model.layers:
    layer.trainable = False

In [None]:
avg = keras.layers.GlobalAveragePooling2D()(base_model.output)

avg = keras.layers.Dense(512, activation='relu')(avg)

avg = keras.layers.Dropout(0.2)(avg)

output = keras.layers.Dense(120, activation="softmax")(avg)

model = keras.Model(inputs=base_model.input, outputs=output)

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics='acc')

In [None]:
cb_model = keras.callbacks.ModelCheckpoint("8_3_2_cb_model5.h5")

best_acc_model = keras.callbacks.ModelCheckpoint(
    "8_3_2_best_acc_model.h5", monitor='val_acc', mode='auto', verbose=1, save_best_only=True)

best_loss_model = keras.callbacks.ModelCheckpoint(
    "8_3_2_best_loss_model.h5", monitor='val_loss', mode='auto', verbose=1, save_best_only=True)

earlystop_model = EarlyStopping(monitor='val_loss',
                                min_delta=0,
                                patience=5,
                                verbose=1,
                                restore_best_weights=True)

In [None]:
model = keras.models.load_model("8_3_1_best_acc_model.h5")

In [None]:
history = model.fit(train_gen,
                   validation_data=val_gen,
                   epochs=1,
                   callbacks=[cb_model, best_acc_model])

## Predict

In [None]:
smallimage = cv2.resize((cv2.imread(train_path + target_df['id'][3])), (200, 200))

In [None]:
plt.imshow(cv2.imread(train_path + target_df['id'][3]))

In [None]:
smallimage = smallimage / 255.

In [None]:
smallimage = smallimage.reshape(1, 200, 200, 3)

In [None]:
best = model.predict(smallimage)
best.argmax()

## DF Target classes + description + real name

In [8]:
dic_breeds = train_gen.class_indices

In [9]:
breeds_df = pd.DataFrame({'breed': dic_breeds.keys(),
                          'name_breed': '',
                          'description': ''},
                          index=dic_breeds.values())

In [10]:
breeds_df.reset_index(inplace=True)

In [11]:
breeds_df = breeds_df.rename(columns={'index': 'id'})

### Extract description + images breeds from Wikipedia

In [12]:
for i in range(len(breeds_df)):
    breeds_df['name_breed'][i] = breeds_df['breed'][i].replace('_', ' ')
    breeds_df['name_breed'][i] = breeds_df['name_breed'][i].title()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  breeds_df['name_breed'][i] = breeds_df['breed'][i].replace('_', ' ')
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  breeds_df['name_breed'][i] = breeds_df['name_breed'][i].title()


In [13]:
# Extract Wikipedia description:

for i in range(len(breeds_df)):
    try:
        breeds_df['description'][i] = wikipedia.summary(
            breeds_df['name_breed'][i])
    except:
        breeds_df['description'][i] = 'No description available.'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  breeds_df['description'][i] = wikipedia.summary(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  breeds_df['description'][i] = 'No description available.'


  lis = BeautifulSoup(html).find_all('li')


In [15]:
breeds_df.to_csv('breeds_df.csv')

In [None]:
# Extract pictures from Wikipedia:

for i in range(len(breeds_df)):
    try:
        page = wikipedia.page(breeds_df['name_breed'][i])
        img = page.images[0]
        img_data = requests.get(img).content
        with open(breeds_df['breed'][i] + '.jpg', 'wb') as handler:
            handler.write(img_data)
    except:
        continue



  lis = BeautifulSoup(html).find_all('li')
