In [1]:
from selenium import webdriver
import pandas as pd
import re
import requests
import os
from zipfile import ZipFile
from zipfile import BadZipFile
from pathlib import Path
import shutil
import csv
from selenium.common.exceptions import NoSuchElementException   
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import NoSuchElementException
from urllib.request import Request, urlopen

In [2]:
def get_texturehaven(directory_output, csv_directory):
    
    if not os.path.exists(directory_output):
            os.mkdir(directory_output)
        
    if not os.path.exists(csv_directory):
            os.mkdir(csv_directory)
    
    def download_files(url, save_path, chunk_size=1024):
        r = requests.get(url, stream=True)
        with open(save_path, 'wb') as fd:
            for chunk in r.iter_content(chunk_size=chunk_size):
                fd.write(chunk)
            

    def get_links_to_csv(base_url, file_name_to_store, anchor_grid_id, anchor_css_selector=None):
        driver = webdriver.Chrome()
        driver.get(base_url)

        anchor_grid = driver.find_element_by_id(anchor_grid_id)

        if anchor_css_selector == None:
            anchor_tags = anchor_grid.find_elements_by_css_selector("a")
        else:
            anchor_tags = anchor_grid.find_elements_by_css_selector(anchor_css_selector)

        texture_csv_rows = []

        for anchor in anchor_tags:
            parsed_url = anchor.get_attribute("href")
            title = parsed_url.split('=')[1]
            csv_row = {
                'title': title.lower(),
                'url': parsed_url,
                'resolution': '2k'
            }
            texture_csv_rows.append(csv_row )

        driver.close()
        # print(texture_csv_rows)

        texture_csv = pd.DataFrame(texture_csv_rows)
        texture_csv.to_csv(file_name_to_store)


    #'//*[@id="preview-download"]/div[2]/div[1]/div[3]/div[2]/a[1]'
    def get_download_links_to_csv(csv_name_with_links, resolution, xPath):
        texture_csv_rows = []

        driver = webdriver.Chrome()

        with open(csv_name_with_links, newline='') as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                driver.get(row['url'])
                download_anchor = driver.find_element_by_xpath(xPath)
                download_url = download_anchor.get_attribute("href")
                csv_row = {
                    'title': row['title'],
                    'url': row['url'],
                    'resolution': resolution,
                    'download_url': download_url
                }
                texture_csv_rows.append(csv_row )

        driver.close()

        texture_csv = pd.DataFrame(texture_csv_rows)
        texture_csv.to_csv(csv_name_with_links)


    def download_zip_files(csv_with_download_links, dir_output):
        
        if not os.path.exists(dir_output):
            os.mkdir(dir_output)
            
        file_path_template = re.compile(r'\/([^/]+$)')

        with open(csv_with_download_links, newline='') as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                file_path = file_path_template.search(row['download_url']).group(1)
                download_files(row['download_url'], f'{dir_output}{file_path.lower()}')
                

    def color_map_count(map_dir):
        list_of_maps = os.listdir(map_dir)
        color_map_count = 0
        
        for map_type in list_of_maps:
            if 'color' in os.path.basename(map_type):
                color_map_count += 1
        
        return color_map_count 
            
            
    def remove_other_color_maps(map_dir, main_color_name):
        list_of_maps = os.listdir(map_dir)
        
        for map_type in list_of_maps:
            map_name = os.path.basename(map_type)
            if 'color' in map_name and not main_color_name in map_name:
                os.remove(f'{map_dir}/{map_name}')

                
    def fix_map_naming(map_dir, old_folder_name, new_folder_name):
        list_of_maps = os.listdir(map_dir)
        
        for map_type in list_of_maps:
            map_name = os.path.basename(map_type)
            new_name = map_name.replace(old_folder_name, new_folder_name)
            os.rename(f'{map_dir}/{map_name}', f'{map_dir}/{new_name}')
            
    def fix_color_maps(map_dir):
        color_maps_count = color_map_count(map_dir)
        old_name = os.path.basename(map_dir)
        
        if color_maps_count > 1:       
            print(map_dir)
            for color_map_number in range(1, color_maps_count):
                new_copy_path =  f'{map_dir}_{color_map_number + 1}'
                shutil.copytree(map_dir, new_copy_path)
                remove_other_color_maps(new_copy_path, f'color_0{color_map_number + 1}')
                new_name = os.path.basename(new_copy_path)
                
            os.rename(map_dir, f'{map_dir}_1')
            remove_other_color_maps(f'{map_dir}_1', 'color_01')
    
            
    def fix_maptypes(map_dir, source_name):
        list_of_maps = os.listdir(map_dir)
        
        for map_type in list_of_maps:
            map_file_name = os.path.basename(map_type)
            name_changed = False
            
            for map_key, map_label in texture_map_labels[source_name].items():

                if map_key in map_file_name:
                    old_name = map_file_name
                    map_file_name = map_file_name.replace(map_key, map_label)
                    os.rename(f'{map_dir}/{old_name}', f'{map_dir}/{source_name}_{map_file_name.lower().replace("-", "--")}')
                    name_changed = True
                    break
            if not name_changed:
                os.rename(f'{map_dir}/{map_file_name}', f'{map_dir}/{source_name}_{map_file_name.lower().replace("-", "--")}')

    
    def extract_files(zip_dir, extracting_dir):
        zip_list = os.listdir(zip_dir)

        for zip_file in zip_list:
            zip_file_name = os.path.basename(zip_file)
        #     print(zip_file_name)
            dir_to_extract = extracting_dir + f'texturehaven_{zip_file_name}'.replace('_jpg', '')
            with ZipFile(f'{zip_dir}{zip_file_name}', 'r') as zip_ref:
                zip_ref.extractall(dir_to_extract[:-4])
                fix_maptypes(dir_to_extract[:-4], 'texturehaven')
                
                if not 'cobblestone_color_2k' in dir_to_extract and not 'texturehaven_roof_slates_03_2k' in dir_to_extract:
                    fix_color_maps(dir_to_extract[:-4])
    
    
    
    get_links_to_csv(base_url = "https://texturehaven.com/textures/",
                     file_name_to_store = f'{csv_directory}/texturehaven_links.csv',
                     anchor_grid_id = "item-grid",
                     )

    get_download_links_to_csv(csv_name_with_links = f'{csv_directory}/texturehaven_links.csv', 
                              resolution = '8k', 
                              xPath = '//*[@id="preview-download"]/div[2]/div[1]/div[3]/div[4]/a[1]'
                             )

    download_zip_files(csv_with_download_links = f'{csv_directory}/texturehaven_links.csv', 
                   dir_output = './texturehaven_zip/'
                  )

    extract_files(zip_dir = './texturehaven_zip/', 
                  extracting_dir = f'{directory_output}/'
                 )

In [3]:
def get_cgbookcase(directory_output, csv_directory):
    
    if not os.path.exists(directory_output):
            os.mkdir(directory_output)
        
    if not os.path.exists(csv_directory):
            os.mkdir(csv_directory)
        
    def download_file(url, save_path, chunk_size=1024):
        r = requests.get(url, stream=True)
        with open(save_path, 'wb') as fd:
            for chunk in r.iter_content(chunk_size=chunk_size):
                fd.write(chunk)
            
            
    def check_exists_by_class(element_class, webdriver):
        try:
            webdriver.find_element_by_class_name(element_class)
        except NoSuchElementException:
            return False
        return True


    def get_links_to_csv(base_url, file_name_to_store, anchor_grid_id, anchor_css_selector=None):
        driver = webdriver.Chrome()
        i = 1
        texture_csv_rows = []

        while True:
            base_url = f"https://www.cgbookcase.com/textures/?category=All&resolution=2&page={i}&color=all&search="
            driver.get(base_url)
            i = i + 1
            anchor_grid = driver.find_element_by_id(anchor_grid_id)

            if anchor_css_selector == None:
                anchor_tags = anchor_grid.find_elements_by_css_selector("a")
            else:
                anchor_tags = anchor_grid.find_elements_by_css_selector(anchor_css_selector)

            button_class = 'fas.fa-angle-right'

            if check_exists_by_class(button_class, driver):
                for anchor in anchor_tags:
                    parsed_url = anchor.get_attribute("href")
                    title = parsed_url.split('/').pop()
                    csv_row = {
                        'title': title.lower(),
                        'url': parsed_url,
                    }
                    texture_csv_rows.append(csv_row)
            else:
                for anchor in anchor_tags:
                    parsed_url = anchor.get_attribute("href")
                    title = parsed_url.split('/').pop()
                    csv_row = {
                        'title': title.lower(),
                        'url': parsed_url,
                    }
                    texture_csv_rows.append(csv_row)
                break

        driver.close()
                # print(texture_csv_rows)

        texture_csv = pd.DataFrame(texture_csv_rows)
        texture_csv.to_csv(file_name_to_store)

    def check_for_4k_res(anchor_tags):
        for anchor in anchor_tags:
                    url = anchor.get_attribute("href")
                    if '4K' in url:
                        return True
        return False
    
    
    def check_for_3k_res(anchor_tags):
        for anchor in anchor_tags:
                    url = anchor.get_attribute("href")
                    if '3K' in url:
                        return True
        return False
    

    def get_download_links_to_csv(csv_name_with_links, resolution, csv_for_download_links, xPath):
        texture_csv_rows = []

        driver = webdriver.Chrome()

        with open(csv_name_with_links, newline='') as csvfile:
            reader = csv.DictReader(csvfile)

            for row in reader:
                driver.get(row['url'])
                anchor_grid = driver.find_element_by_xpath('//*[@id="view-downloadLinks"]')
                anchor_tags = anchor_grid.find_elements_by_css_selector("a")
                propertys_urls = []
                
                if check_for_4k_res(anchor_tags):
                    for anchor in anchor_tags:
                        url = anchor.get_attribute("href")
                        if '4K' in url:
                            csv_row = {
                                'title': row['title'],
                                'url': row['url'],
                                'resolution': '4k',
                                'property': url.split('_').pop(),
                                'download_url': url
                            }
                        texture_csv_rows.append(csv_row)
                elif check_for_3k_res(anchor_tags):
                    for anchor in anchor_tags:
                        url = anchor.get_attribute("href")
                        if '3K' in url:
                            csv_row = {
                                'title': row['title'],
                                'url': row['url'],
                                'resolution': '3k',
                                'property': url.split('_').pop(),
                                'download_url': url
                            }
                        texture_csv_rows.append(csv_row)
                else:
                    for anchor in anchor_tags:
                        url = anchor.get_attribute("href")
                        if '2K' in url:
                            csv_row = {
                                'title': row['title'],
                                'url': row['url'],
                                'resolution': '2k',
                                'property': url.split('_').pop(),
                                'download_url': url
                            }
                            texture_csv_rows.append(csv_row)
                        

        driver.close()

        texture_csv = pd.DataFrame(texture_csv_rows)
        texture_csv.to_csv(csv_for_download_links)
        

    def fix_maptypes(map_dir, source_name):
        list_of_maps = os.listdir(map_dir)
        
        for map_type in list_of_maps:
            map_file_name = os.path.basename(map_type)
            
            for map_key, map_label in texture_map_labels[source_name].items():

                if map_key in map_file_name:
                    old_name = map_file_name
                    map_file_name = map_file_name.replace(map_key, map_label)
                    os.rename(f'{map_dir}/{old_name}', f'{map_dir}/{map_file_name.lower()}')
        
        
    def download_files(csv_with_download_links, dir_output):
        file_path_template = re.compile(r'\/([^/]+$)')

        with open(csv_with_download_links, newline='') as csvfile:
            reader = csv.DictReader(csvfile)
            first_row = next(reader)
            same_dir_list = []
            same_dir_list.append(first_row)
            title = f'cgbookcase_{first_row["title"]}_{first_row["resolution"]}'.lower()

            for row in reader:

                if row['title'] in title:
                    same_dir_list.append(row)
                else:
                    i = 0

                    for dir_item in same_dir_list:

                        if i == 0:
                            title = title.replace('-', '--')
                            os.mkdir(f'{dir_output}{title}')

                        i = 1
                        save_path = f"{dir_output}{title}/{title}_{dir_item['property']}"
#                         print(f"{dir_output} and file path: {title}/{dir_item['property']}")
                        download_file(dir_item['download_url'], save_path)
    
                    fix_maptypes(f"{dir_output}{title}", 'cgbookcase')
                    same_dir_list = []
                    same_dir_list.append(row)
                    title = f'cgbookcase_{row["title"]}_{row["resolution"]}'.lower()




    def extract_files(zip_dir, extracting_dir):
        zip_list = os.listdir(zip_dir)

        for zip_file in zip_list:
            zip_file_name = os.path.basename(zip_file)
        #     print(zip_file_name)
            dir_to_extract = extracting_dir + zip_file_name
            with ZipFile(zip_dir + zip_file_name, 'r') as zip_ref:
                zip_ref.extractall(dir_to_extract[:-4])

                
    def delete_folders_with_one_picture(dir_of_folders):
        folders = os.listdir(dir_of_folders)
        for folder in folders:
            dir_of_folder = f'{dir_of_folders}{os.path.basename(folder)}'
            if len(os.listdir(dir_of_folder)) == 1:
                shutil.rmtree(dir_of_folder)    
                
                
    get_links_to_csv(base_url = "https://www.cgbookcase.com/textures/?category=All&resolution=2&page=1&color=all&search=",
                 file_name_to_store = f"{csv_directory}/cgbookcase_links.csv",
                 anchor_grid_id = "textures-list"
                 )
    
    get_download_links_to_csv(csv_name_with_links = f"{csv_directory}/cgbookcase_links.csv", 
                          resolution = '2k', 
                          csv_for_download_links = f"{csv_directory}/cgbookcase_links.csv",
                          xPath = '//*[@id="preview-download"]/div[2]/div[1]/div[3]/div[2]/a[1]'
                         )
    
    download_files(csv_with_download_links = f"{csv_directory}/cgbookcase_links.csv", 
               dir_output = f'{directory_output}/'
              )
    
    extract_files(zip_dir = './cgbookcase_zip/', 
              extracting_dir = directory_output
             )
    
    delete_folders_with_one_picture(dir_of_folders = f'{directory_output}/')
    
    

In [4]:
def get_goodtextures(directory_output, csv_directory):
    
    if not os.path.exists(directory_output):
            os.mkdir(directory_output)
        
    if not os.path.exists(csv_directory):
            os.mkdir(csv_directory)
        
    def download_files(url, save_path, chunk_size=1024):
        r = requests.get(url, stream=True)
        with open(save_path, 'wb') as fd:
            for chunk in r.iter_content(chunk_size=chunk_size):
                fd.write(chunk)
            

    def get_download_links_straight_to_csv(base_url, file_name_to_store, anchor_grid_xpath, resolution, anchor_css_selector=None):
        driver = webdriver.Chrome()
        driver.get(base_url)

        anchor_grid = driver.find_element_by_xpath(anchor_grid_xpath)
        parsed_urls = []

        if anchor_css_selector == None:
            anchor_divs = anchor_grid.find_elements_by_css_selector(".row")

            for div in anchor_divs:
                for anchor in div.find_elements_by_css_selector("a"):
                    parsed_urls.append(anchor.get_attribute("href"))
        else:
            anchor_tags = anchor_grid.find_elements_by_css_selector(anchor_css_selector)

        texture_csv_rows = []
        download_urls = []

        for url in parsed_urls:
            driver.get(url)
            divs = driver.find_element_by_xpath('/html/body/div[2]/div/div[1]')
            containers_list = divs.find_elements_by_css_selector('div.container')

            i = 1
            for container in containers_list:
                download_anchor = container.find_element_by_xpath(f'/html/body/div[2]/div/div[1]/div[{i}]/div/div/div/div[2]/div/p[4]/a')
                download_url = download_anchor.get_attribute("href")
                i = i + 1
                title = download_url.split('/')[-1].split('_materials')[0]
                csv_row = {
                    'title': title.lower(),
                    'url': url,
                    'resolution': resolution,
                    'download_url': download_url
                }
                texture_csv_rows.append(csv_row )
        driver.close()

        texture_csv = pd.DataFrame(texture_csv_rows)
        texture_csv.to_csv(file_name_to_store)

    def download_zip_files(csv_with_download_links, dir_output):
        
        if not os.path.exists(dir_output):
            os.mkdir(dir_output)
            
        file_path_template = re.compile(r'\/([^/]+$)')

        with open(csv_with_download_links, newline='') as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                file_path = file_path_template.search(row['download_url']).group(1).lower()
                download_files(row['download_url'], f'{dir_output}{file_path}_{row["resolution"]}')

                
    def move_files_one_layer_up(dir_of_folder):
        
            for inner_folder in os.listdir(dir_of_folder):
                inner_folder_path = f'{dir_of_folder}/{os.path.basename(inner_folder)}'
                for file in os.listdir(inner_folder_path):
                    shutil.move(f'{inner_folder_path}/{os.path.basename(file)}', 
                                f'{dir_of_folder}/goodtextures_{os.path.basename(file)}')
            shutil.rmtree(inner_folder_path)
            

    def fix_maptypes(map_dir, source_name):
        list_of_maps = os.listdir(map_dir)
        
        for map_type in list_of_maps:
            map_file_name = os.path.basename(map_type)
            
            for map_key, map_label in texture_map_labels[source_name].items():
                old_name = map_file_name
                map_file_name = map_file_name.replace(map_key, map_label)
                map_file_name_array = map_file_name.split('_')
                map_file_name_peace = f'2k_{map_file_name_array[-1]}'
                map_file_name = map_file_name.replace(map_file_name_array[-1], map_file_name_peace)
                os.rename(f'{map_dir}/{old_name}', f'{map_dir}/{map_file_name.lower()}')
            
            
    def extract_files(zip_dir, extracting_dir):
        zip_list = os.listdir(zip_dir)

        for zip_file in zip_list:
            zip_file_name = os.path.basename(zip_file)
    #         print(zip_file_name)
            dir_to_extract = extracting_dir + 'goodtextures_' + zip_file_name
            try:
                with ZipFile(zip_dir + zip_file_name, 'r') as zip_ref:
                    dir_to_extract = dir_to_extract.replace('.zip','')
                    zip_ref.extractall(dir_to_extract)
                    move_files_one_layer_up(dir_to_extract)
                    fix_maptypes(dir_to_extract, 'goodtextures')
            except BadZipFile:
                os.remove(zip_dir + zip_file_name)

            
#     get_download_links_straight_to_csv(base_url = "https://www.goodtextures.com/blog/22/pbr-texture-categories",
#                  file_name_to_store = f"{csv_directory}/goodtextures_links.csv",
#                  anchor_grid_xpath = "/html/body/div[2]/div/div[1]",
#                  resolution = '2k'              
#                  )

    
#     download_zip_files(csv_with_download_links = f"{csv_directory}/goodtextures_links.csv",
#                dir_output = './goodtextures_zip/'
#               )
    
    
    extract_files(zip_dir = './goodtextures_zip/', 
                  extracting_dir = f'{directory_output}/'
                 )
                  
        
#     move_files_one_layer_up(dir_of_folders = './unzipped/')

In [5]:
def get_archinspiration(directory_output, csv_directory):
    
    if not os.path.exists(directory_output):
            os.mkdir(directory_output)
        
    if not os.path.exists(csv_directory):
            os.mkdir(csv_directory)
        
    def download_files(url, save_path, chunk_size=1024):
        r = requests.get(url, stream=True)
        with open(save_path, 'wb') as fd:
            for chunk in r.iter_content(chunk_size=chunk_size):
                fd.write(chunk)
            

    def get_links_to_csv(base_url, file_name_to_store, anchor_grid_class_name, anchor_css_selector=None):
        driver = webdriver.Chrome()
        driver.get(base_url)

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

        driver.implicitly_wait(10)

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

        driver.find_element_by_class_name("sqs-block-button-element--small.sqs-block-button-element.pagination-loadMore").click()

        anchor_grid = driver.find_element_by_class_name(anchor_grid_class_name)

        if anchor_css_selector == None:
            anchor_tags = anchor_grid.find_elements_by_css_selector("a")
        else:
            anchor_tags = anchor_grid.find_elements_by_css_selector(anchor_css_selector)

        texture_csv_rows = []

        for anchor in anchor_tags:
            parsed_url = anchor.get_attribute("href")
            title = parsed_url.split('/')[-1]
            csv_row = {
                'title': title.lower().replace('-', '--'),
                'url': parsed_url
            }
            texture_csv_rows.append(csv_row )

        driver.close()
        # print(texture_csv_rows)

        texture_csv = pd.DataFrame(texture_csv_rows)
        texture_csv.to_csv(file_name_to_store)


    #'//*[@id="preview-download"]/div[2]/div[1]/div[3]/div[2]/a[1]'
    def get_download_links_to_csv(csv_name_with_links, resolution, csv_for_download_links, class_name):
        texture_csv_rows = []
        driver = webdriver.Chrome()

        with open(csv_name_with_links, newline='') as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                driver.get(row['url'])
                download_anchors = driver.find_elements_by_class_name(class_name)
                for download_anchor in download_anchors:
                    if 'FREE' in download_anchor.text:
                        download_url = download_anchor.get_attribute("href")
                        csv_row = {
                            'title': row['title'],
                            'url': row['url'],
                            'resolution': '1k',
                            'download_url': download_url
                        }
                        texture_csv_rows.append(csv_row )

        driver.close()

        texture_csv = pd.DataFrame(texture_csv_rows)
        texture_csv.to_csv(csv_for_download_links)


    def download_zip_files(csv_with_download_links, dir_output):
        
        if not os.path.exists(dir_output):
            os.mkdir(dir_output)

        with open(csv_with_download_links, newline='') as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
    #             file_path = file_path_template.search(row['title'])
                save_path = f"{dir_output}archinspirations_{row['title']}_{row['resolution']}"
                download_files(row['download_url'], save_path)

    def fix_maptypes(map_dir, source_name):
        list_of_maps = os.listdir(map_dir)
        
        for map_type in list_of_maps:
            map_file_name = os.path.basename(map_type)
            
            for map_key, map_label in texture_map_labels[source_name].items():
                old_name = map_file_name
                map_file_name = map_file_name.replace(map_key, map_label)
                os.rename(f'{map_dir}/{old_name}', f'{map_dir}/{map_file_name.lower()}')
            
            
    def move_files_one_layer_up(dir_of_folder):
#     removing unnecessary files from the top layer
        for file in os.listdir(dir_of_folder):
            file_path = f'{dir_of_folder}/{os.path.basename(file)}'
            for item in os.listdir(dir_of_folder):
                if 'maps' not in os.path.basename(item):
                    os.remove(f'{dir_of_folder}/{item}')
#         moving files to the upper layer
        for file in os.listdir(dir_of_folder):
            file_path = f'{dir_of_folder}/{os.path.basename(file)}'
            for item in os.listdir(f'{dir_of_folder}/maps'):
                shutil.move(f'{dir_of_folder}/maps/{os.path.basename(item)}', 
                            f'{dir_of_folder}/{os.path.basename(item)}')

        shutil.rmtree(f'{dir_of_folder}/maps')
        fix_maptypes(dir_of_folder, 'archinspirations')
            
            
    def extract_files(zip_dir, extracting_dir):
        zip_list = os.listdir(zip_dir)

        for zip_file in zip_list:
            zip_file_name = os.path.basename(zip_file)
            dir_to_extract = extracting_dir + zip_file_name
            try:
                if zip_file_name in 'archinspirations_marble-ai-01b_1k':
                    os.remove(zip_dir + zip_file_name)
                else:
                    with ZipFile(zip_dir + zip_file_name, 'r') as zip_ref:
                        zip_ref.extractall(dir_to_extract)
                        move_files_one_layer_up(dir_to_extract)
                        fix_maptypes(dir_to_extract, 'archinspirations')
            except BadZipFile:
                os.remove(zip_dir + zip_file_name)

#             shutil.rmtree(f'{dir_of_folder}/maps')
            

    get_links_to_csv(base_url = "https://www.archinspirations.com/materials",
                 file_name_to_store = f"{csv_directory}/archinspirations_links.csv",
                 anchor_grid_class_name = 'summary-item-list.sqs-gallery.sqs-gallery-design-autogrid',
                 anchor_css_selector = 'a.summary-thumbnail-container.sqs-gallery-image-container'
                 )
    
    get_download_links_to_csv(csv_name_with_links = f"{csv_directory}/archinspirations_links.csv", 
                          resolution = '1k', 
                          csv_for_download_links = f"{csv_directory}/archinspirations_links.csv",
                          class_name = 'sqs-block-button-element--small.sqs-block-button-element'
                         )
    
    download_zip_files(csv_with_download_links = f"{csv_directory}/archinspirations_links.csv", 
               dir_output = './archinspirations_zip/'
              )
    
    extract_files(zip_dir = './archinspirations_zip/',
              extracting_dir = f'{directory_output}/'
             )
    
    # before that you have to fix marble-ai-01b folder, because this folder holds 2 folders,
    # so just move one of those folders one layer up

In [6]:
def get_FreePBR(directory_output, csv_directory):
    
    if not os.path.exists(directory_output):
            os.mkdir(directory_output)
        
    if not os.path.exists(csv_directory):
            os.mkdir(csv_directory)
        
    def download_files(url, save_path, chunk_size=1024):
        r = requests.get(url, stream=True)
        with open(save_path, 'wb') as fd:
            for chunk in r.iter_content(chunk_size=chunk_size):
                fd.write(chunk)


    def get_links_to_csv(base_url, file_name_to_store, anchor_grid_xpath, resolution, anchor_css_selector=None):
        driver = webdriver.Chrome()
        driver.get(base_url)

        texture_csv_rows = []

        category_list = driver.find_element_by_xpath('//*[@id="woocommerce_product_categories-2"]/ul')
        category_anchors = category_list.find_elements_by_css_selector("a")

        category_anchors.pop(0)#we don't need the first anchor
        category_urls = []

        # we have to collect urls from anchor tags in separate way, because anchor tags must be attached to the DOM
        for category_anchor in category_anchors:
            category_urls.append(category_anchor.get_attribute("href")) 

        for category_url in category_urls:
            driver.get(category_url)
            anchor_grid = driver.find_element_by_xpath(anchor_grid_xpath)

            if anchor_css_selector == None:
                anchor_tags = anchor_grid.find_elements_by_css_selector("a")
            else:
                anchor_tags = anchor_grid.find_elements_by_css_selector(anchor_css_selector)

            for anchor in anchor_tags:
                parsed_url = anchor.get_attribute("href")
                title = parsed_url.split('/')[-2]
                csv_row = {
                    'title': f'freepbr_{title.lower().replace("-", "--")}_{resolution}',
                    'url': parsed_url,
                    'resolution': resolution
                }
                texture_csv_rows.append(csv_row )

        driver.close()
            # print(texture_csv_rows)

        texture_csv = pd.DataFrame(texture_csv_rows)
        texture_csv.to_csv(file_name_to_store)


    #'//*[@id="preview-download"]/div[2]/div[1]/div[3]/div[2]/a[1]'
    def get_download_links_to_csv(csv_name_with_links, resolution, csv_for_download_links, xPath):
        texture_csv_rows = []

        driver = webdriver.Chrome()

        with open(csv_name_with_links, newline='') as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                driver.get(row['url'])
                try:
                    download_anchor = driver.find_element_by_xpath(xPath)
                    download_url = download_anchor.get_attribute("href")
                    csv_row = {
                        'title': row['title'],
                        'url': row['url'],
                        'resolution': resolution,
                        'download_url': download_url
                    }
                    texture_csv_rows.append(csv_row )
                except NoSuchElementException:
                    continue
        driver.close()

        texture_csv = pd.DataFrame(texture_csv_rows)
        texture_csv.to_csv(csv_for_download_links)


    def download_zip_files(csv_with_download_links, dir_output):
        
        if not os.path.exists(dir_output):
            os.mkdir(dir_output)

        with open(csv_with_download_links, newline='') as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
    #             file_path = file_path_template.search(row['title'])
                save_path = f"{dir_output}{row['title']}"
                download_files(row['download_url'], save_path)
    
    
    def fix_maptypes(map_dir, source_name):
        list_of_maps = os.listdir(map_dir)
        
        for map_type in list_of_maps:
            map_file_name = os.path.basename(map_type)
            name_changed = False
            
            for map_key, map_label in texture_map_labels[source_name].items():

                if map_key in map_file_name:
                    old_name = map_file_name
                    map_file_name = map_file_name.replace(map_key, map_label)
                    os.rename(f'{map_dir}/{old_name}', f'{map_dir}/{source_name}_2k_{map_file_name.lower().replace("-", "--")}')
                    name_changed = True
            if not name_changed:
                os.rename(f'{map_dir}/{map_file_name}', f'{map_dir}/{source_name}_2k_{map_file_name.lower().replace("-", "--")}')
    
    def move_files_one_layer_up(dir_of_folder):
        inner_folder_path = f'{dir_of_folder}/{os.path.basename(os.listdir(dir_of_folder)[0])}'

        if os.path.isdir(inner_folder_path):

            for file in os.listdir(inner_folder_path):
                shutil.move(f'{inner_folder_path}/{os.path.basename(file)}', 
                            f'{dir_of_folder}/{os.path.basename(file)}')
            shutil.rmtree(inner_folder_path)
            

    def extract_files(zip_dir, extracting_dir):
        zip_list = os.listdir(zip_dir)

        for zip_file in zip_list:
            zip_file_name = os.path.basename(zip_file)
        #     print(zip_file_name)
            try:
                dir_to_extract = extracting_dir + zip_file_name
                with ZipFile(zip_dir + zip_file_name, 'r') as zip_ref:
                    zip_ref.extractall(dir_to_extract)
                    move_files_one_layer_up(dir_to_extract)
                    fix_maptypes(dir_to_extract, 'freepbr')
            except BadZipFile:
                continue
        #     print('UNZIPPED')

        # print('done')    
    

    
    
    get_links_to_csv(base_url = "https://freepbr.com/",
                 file_name_to_store = f"{csv_directory}/freepbr_links.csv",
                 anchor_grid_xpath = '//*[@id="main"]/ul',
                 anchor_css_selector = "a.button.product_type_simple",
                 resolution = '2k'
                 )
    
    get_download_links_to_csv(csv_name_with_links = f"{csv_directory}/freepbr_links.csv", 
                          resolution = '2k', 
                          csv_for_download_links = f"{csv_directory}/freepbr_links.csv",
                          xPath = '/html/body/div[1]/div[2]/div/div[2]/main/div[2]/div[2]/div[1]/div/ul/li[3]/a'
                         )
    
    download_zip_files(csv_with_download_links = f"{csv_directory}/freepbr_links.csv", 
               dir_output = './freepbr_zip/'
              )
    
    extract_files(zip_dir = './freepbr_zip/', 
              extracting_dir = f'{directory_output}/'
             )
    
#     move_files_one_layer_up(
#     dir_of_folders = './unzipped/'
#     )

In [7]:
def get_cc0textures(directory_output, csv_directory, resolution = '8K'):
    
    if not os.path.exists(directory_output):
            os.mkdir(directory_output)
        
    if not os.path.exists(csv_directory):
            os.mkdir(csv_directory)
        
    source_name = 'cc0textures'
    headers = {'User-Agent': 'Mozilla/5.0'}
    data  = Request('https://cc0textures.com/api/v1/downloads_csv', headers=headers)
    data = urlopen(data)
    data = pd.read_csv(data) 
    data.head()

    data.to_csv(f"{csv_directory}/c00textures_links.csv")

    def download_url(url, save_path, chunk_size=1024):
        r = requests.get(url, stream=True)
        with open(save_path, 'wb') as fd:
            for chunk in r.iter_content(chunk_size=chunk_size):
                fd.write(chunk)

    def random_crop(image):
        return tf.Session().run(tf.image.random_crop(image, size=[500, 500, 3]))


    
    download_attribute = '8K-JPG'
    
    if not resolution == '8K':
        download_attribute = f'{resolution}K-JPG'
        
    metadata = pd.read_csv(f"{csv_directory}/c00textures_links.csv")
    metadata = metadata[metadata['DownloadAttribute'] == download_attribute]
    metadata['material'] = metadata['AssetID'].str.extract(r'([A-Za-z]+)')
    metadata['material'].value_counts().head(20)

    material = 'PavingStones'
    dir_zip_output = './c00textures_zip'

    if not os.path.exists(dir_zip_output): 
        os.mkdir(dir_zip_output)

    # https://help.cc0textures.com/doku.php?id=api_v1:start
    metadata = pd.read_csv(f"{csv_directory}/c00textures_links.csv")
    metadata['material'] = metadata['AssetID'].str.extract(r'([A-Za-z]+)')
    metadata = metadata[metadata['DownloadAttribute'] == download_attribute]
    metadata = metadata[metadata['material'] == material]

    file_path_template = re.compile(r'\/([^/]+$)')
    for index, row in metadata.iterrows():    
        url = row['RawDownloadLink']
        file_path = f'{source_name}_{file_path_template.search(url).group(1).replace("-JPG.zip", "").lower().replace("-", "--")}'
        download_url(url, f'{dir_zip_output}/{file_path}')
        
        
    def fix_maptypes(map_dir, source_name):
        list_of_maps = os.listdir(map_dir)
        
        for map_type in list_of_maps:
            map_file_name = os.path.basename(map_type)
            name_changed = False
            
            for map_key, map_label in texture_map_labels[source_name].items():

                if map_key in map_file_name:
                    old_name = map_file_name
                    map_file_name = map_file_name.replace(map_key, map_label)
                    os.rename(f'{map_dir}/{old_name}', f'{map_dir}/{source_name}_{map_file_name.lower().replace("-", "--")}')
                    name_changed = True
            if not name_changed:
                os.rename(f'{map_dir}/{map_file_name}', f'{map_dir}/{source_name}_{map_file_name.lower().replace("-", "--")}')
        
        
    def extract_files(zip_dir, extracting_dir):
        zip_list = os.listdir(zip_dir)

        for zip_file in zip_list:
            zip_file_name = os.path.basename(zip_file)
        #     print(zip_file_name)
            try:
                dir_to_extract = extracting_dir + zip_file_name
                with ZipFile(zip_dir + zip_file_name, 'r') as zip_ref:
                    zip_ref.extractall(dir_to_extract)
                    fix_maptypes(dir_to_extract, 'cc0textures')
            except BadZipFile:
                continue
        
        
    extract_files(zip_dir = f'{dir_zip_output}/', 
                  extracting_dir = f'{directory_output}/'
                 )

In [8]:
texture_map_labels = {
    'texturehaven': {
        'AO': 'ao',
        'rough_ao': 'roughness_ao',
        'Rough_ao': 'roughness_ao',
        'diff': 'color',
        'Diff': 'color',
        'Base_Color': 'color',
        'basecolorolor': 'color',
        'Col_01': 'color_01',
        'col_01': 'color_01',
        'Col_02': 'color_02',
        'col_02': 'color_02',
        'Col_03': 'color_03',
        'col_03': 'color_03',
        'Nor': 'normal',
        'nor': 'normal',
        'rough': 'roughness',
        'Rough': 'roughness',
        'disp': 'displacement',
        'Disp': 'displacement',
        'spec': 'specular',
        'Spec': 'specular',
        'bump': 'bump',
        'Bump': 'bump' 
    },
    'cgbookcase': {
        'AO': 'ao',
        'ao': 'ao',
        'Color': 'color',
        'Height': 'height',
        'Normal': 'normal',
        'Roughness': 'roughness'        
    },
    'goodtextures': {
        'basecolor': 'color'
    },
    'archinspirations': {
        'AO': 'ao',
        'REFL': 'reflection',
        'COLOR': 'color',
        'NRM': 'normal',
        'GLOSS': 'roughness',
        'DISP': 'displacement',
        'BUMP': 'bump'
         
    },
    'freepbr': {
        'albedo': 'color',
        'basecolor': 'color',
        'normal-ogl': 'normal'
    },
    'cc0textures': {
        'AmbienientOcclusion': 'ao',
        'Basecolor': 'color',
    }
}

In [9]:
source_get_functions = {
    'texturehaven': get_texturehaven,
    'cgbookcase': get_cgbookcase,
    'goodtextures': get_goodtextures,
    'archinspiration': get_archinspiration,
    'freepbr': get_FreePBR,
    'cc0textures': get_cc0textures
}

In [10]:
def get_source(source_name, directory_output, csv_directory):
    
    return source_get_functions[source_name](directory_output, csv_directory)    

In [11]:
source_labels = [
                 'texturehaven', #resolution 8k
                 'cgbookcase', #resolution mostly 4k, but there is 3k and 2k without any other higher options, so 
                               #it downloads the highest available resolution
                 'goodtextures', # resolution 2k
                 'archinspiration', # resolution 1k
                 'freepbr', #resolution 2k
                 'cc0textures' #by default it's 8k, but can be changed up to 16k, just by adding one more 
                               #argument (must be an integer)
                               #which defines resolution
                 ]

In [12]:
# get_source('texturehaven', './all-textures', './all-csv')

In [13]:
# get_source('cgbookcase', './all-textures', './all-csv')

In [15]:
# get_source('goodtextures', './all-textures', './all-csv')

In [12]:
# get_source('archinspiration', './all-textures', './all-csv')

In [16]:
# get_source('freepbr', './all-textures', './all-csv')

In [17]:
# get_source('cc0textures', './all-textures', './all-csv')