In [2]:
import os

PROJECT_NAME = 'GoBiz'
DRIVE_PATH = f"/content/drive/MyDrive/{PROJECT_NAME}"


if 'google.colab' in str(get_ipython()):
    from google.colab import drive
    try:
        drive.mount('/content/drive')
    except:
        print("Mounting Fail")
    else:
        print("Mouting Success")
    finally:
        print("Mounting process is completed")


    if not os.path.exists(DRIVE_PATH):
        try:
            from git import Repo
        except ModuleNotFoundError:
            import sys, subprocess
            print("Installing 'git' module")
            subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'gitpython'])

        from git import Repo
        os.mkdir(DRIVE_PATH)
        Repo.clone_from(f"https://github.com/VuongTuanKhanh/{PROJECT_NAME}", DRIVE_PATH)
        print("Cloning git repo")


    os.chdir(DRIVE_PATH)

Mounted at /content/drive
Mouting Success
Mounting process is completed


In [3]:
import logging
import logging.config

logging.config.fileConfig('./core/logging.conf')
logger = logging.getLogger('simpleExample')

file_handler = logging.FileHandler('file.log')
file_handler.setLevel(logging.DEBUG)
file_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger.addHandler(file_handler)

In [9]:
import os
from core.exception import FileSavingError


from core.helper import BigQueryClient
client = BigQueryClient('./core/BigquerryCredential.json')


DATA_PATH = f'./data/{PROJECT_NAME}_data.json'
if not os.path.exists(DATA_PATH):
    with open('./query/query_2.txt', 'r') as query_str:
        df = client.query(query_str.read())
        try:
            try:
                os.mkdir('data')
            except FileExistsError:
                pass
            df.to_json(DATA_PATH, orient='records', lines=True)
        except Exception as e:
            raise FileSavingError
else:
    import pandas as pd
    df = pd.read_json(DATA_PATH, lines=True)
df.head()

Unnamed: 0,ImageUrl,ProductName,ShopLocation,TotalSold,ProductLink,category_1,category_2,Shop
0,https://cf.shopee.vn/file/fdc2a6ee9e509c28494d...,"[50KG-95KG] Có Video, Thun Lạnh co giãn 4 Chiề...",Gia Lai,"Đã bán 18,4k",https://shopee.vn/-50KG-95KG-Có-Video-Thun-Lạn...,Thời Trang Nam,Áo,Shop thể thao HMStore
1,https://cf.shopee.vn/file/a1e689bb71ab4b9eb998...,Áo thể thao nam chất thun lạnh co giãn 4 chiều...,Gia Lai,"Đã bán 6,9k",https://shopee.vn/Áo-thể-thao-nam-chất-thun-lạ...,Thời Trang Nam,Áo,Shop thể thao HMStore
2,https://cf.shopee.vn/file/bec3043085e9ebfb93d3...,[Hàng mới về] Áo thun thể thao nam chất thun l...,Gia Lai,"Đã bán 1,9k",https://shopee.vn/-Hàng-mới-về-Áo-thun-thể-tha...,Thời Trang Nam,Áo,Shop thể thao HMStore
3,https://cf.shopee.vn/file/3acef42a09adac1695a1...,[Hàng mới về] Áo thun thể thao nam mã [K04] ch...,Gia Lai,"Đã bán 1,5k",https://shopee.vn/-Hàng-mới-về-Áo-thun-thể-tha...,Thời Trang Nam,Áo,Shop thể thao HMStore
4,https://cf.shopee.vn/file/35a5e2bb8406156b2de3...,Áo thun nam cổ tròn tay ngắn Form rộng chất má...,Gia Lai,"Đã bán 2,5k",https://shopee.vn/Áo-thun-nam-cổ-tròn-tay-ngắn...,Thời Trang Nam,Áo,Shop thể thao HMStore


In [None]:
import pickle
import numpy as np
import requests
from io import BytesIO
from PIL import Image
import matplotlib.pyplot as plt
from core.helper import get_feature, load_img
from tensorflow.keras.applications.imagenet_utils import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array, array_to_img


from core.helper import ImageHandler
image_handler = ImageHandler()
feature_extractor = image_handler.resnet_extractor((224, 224, 3))


iter_data = dict(
    img_urls                = list(),
    features                = list(),
    transformed_features    = list(),
    product_names           = list(),
    shop_locations          = list(),
    total_solds             = list(),
    product_urls            = list(),
    category_1s             = list(), 
    category_2s             = list(),
    shop                    = list()
)


for i in range(len(df['ImageUrl'])):
    img_url = df.loc[i, 'ImageUrl']
    try:
        img = image_handler.load_img(img_url, target_size = (224, 224))
        x = array_to_img(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)#.reshape(150528)
        x_transformed = image_handler.get_feature(feature_extractor, x)

        iter_data['img_urls'].append(img_url)
        iter_data['features'].append(x)
        iter_data['transformed_features'].append(x_transformed)
        iter_data['product_names'].append(df.loc[i, 'ProductName'])
        iter_data['shop_locations'].append(df.loc[i, 'ShopLocation'])
        iter_data['total_solds'].append(df.loc[i, 'TotalSold'])
        iter_data['product_urls'].append(df.loc[i, 'ProductLink'])
        iter_data['category_1'].append(df.loc[i, 'id_category_1'])
        iter_data['category_2'].append(df.loc[i, 'id_category_2'])
        iter_data['shop'].append(df.loc[i, 'Shop'])
    except Exception as e:
        print(e)
        logger.error(e)

iter_data['transformed_features'] = np.reshape(iter_data['transformed_features'], (len(iter_data['img_urls']), 2048))
    
try:
    os.mkdir('./ModelSaved')
except FileExistsError:
    pass

for key, value in iter_data.items():
    pickle.dump(value, open(f'./ModelSaved/{key}' + str(i+1)+'.sav', 'wb'))

In [None]:
from sklearn.neighbors import NearestNeighbors

neighbors = NearestNeighbors(n_neighbors=5, algorithm='ball_tree', metric='euclidean')
# neighbors = NearestNeighbors(n_neighbors=5, algorithm='auto', metric='cosine')
# neighbors = NearestNeighbors(n_neighbors=3, algorithm='auto', metric='minkowski')
neighbors.fit(np.array(iter_data['transformed_features']))
# save the model to ModelSaved folder
filename = './ModelSaved/finalized_model.sav'
pickle.dump(neighbors, open(filename, 'wb'))