In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager

from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException


## 1- Data Scraping

In [151]:
def soup_with_selenium(url, scroll=False, product=False):
    # Start a new instance of Chrome WebDriver
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
    
    # Navigate to the URL
    driver.get(url)

    if product:
        try:
            WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'div.image-btn')))
        except:
            print(f'image-btn was not found for {url}')  
    else:
        time.sleep(4)

    # If scroll is True, scroll down to trigger loading of additional content
    if scroll:
        driver.execute_script("window.scrollBy(0, 1200);")
        time.sleep(1)
        driver.execute_script("window.scrollBy(0, 800);")
        try:
            WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'div#totalLook')))
            # Code to execute when the element is found and visible
        except TimeoutException:
        # Code to execute when the element is not found within the timeout period
            print(f'totalLook was not found for {url}')
            time.sleep(2)
        time.sleep(2.0)

    # Get the page source after the page has fully loaded
    page_source = driver.page_source
    
    # Close the WebDriver
    driver.quit()

    # Parse the page source with BeautifulSoup
    soup = BeautifulSoup(page_source, 'html.parser')

    return soup

In [4]:
def fetch_catalog(url):
    soup = soup_with_selenium(url)
    
    catalog_box = soup.find('div', attrs={'id': 'catalogProductsList'})
    
    catalog_link = []
    if catalog_box:
        items = catalog_box.find_all('a')
        for item in items:
            link = 'https://shop.mango.com' + item.get('href')
            catalog_link.append(link)
    
    return catalog_link

In [46]:
#to check catalog function
url = 'https://shop.mango.com/nl-en/women/blouses-and-shirts_c78920337'
catalog_urls = fetch_catalog(url)
print(catalog_urls)

['https://shop.mango.com/nl-en/women/blouses-and-shirts-shirts/wrap-linen-blend-shirt_67035755.html?c=05', 'https://shop.mango.com/nl-en/women/blouses-and-shirts-shirts/chest-pocket-denim-shirt_67094022.html?c=TM', 'https://shop.mango.com/nl-en/women/blouses-and-shirts-shirts/chest-pocket-denim-shirt_67094022.html?c=TM', 'https://shop.mango.com/nl-en/women/blouses-and-shirts-shirts/satin-striped-shirt_67095756.html?c=05', 'https://shop.mango.com/nl-en/women/blouses-and-shirts-shirts/pocket-oversize-shirt_67030437.html?c=01', 'https://shop.mango.com/nl-en/women/blouses-and-shirts-blouses/double-breasted-blouse-with-bow_67055759.html?c=99', 'https://shop.mango.com/nl-en/women/blouses-and-shirts-shirts/floral-embroidered-shirt_67075726.html?c=88', 'https://shop.mango.com/nl-en/women/blouses-and-shirts-shirts/linen-100-shirt_67096314.html?c=50', 'https://shop.mango.com/nl-en/women/blouses-and-shirts-blouses/double-breasted-blouse-with-bow_67055759.html?c=05', 'https://shop.mango.com/nl-en/

In [11]:
def fetch_product(soup):
    image_boxes = soup.find_all('div', attrs={'class': 'image-btn'}) 

    if image_boxes:
        image_box = image_boxes[-1]
    else:
        return {}

    data = {}
    if image_box:
        item = image_box.find('img')
        link = 'https:' + item.get('src')
        data['image_link'] = link
                
    type_boxes = soup.find_all('a', attrs = {'class':'breadcrumbs-link'})
    types = []
    for type_box in type_boxes:
        type = type_box.find('span', attrs = {'itemprop':'name'}).text.strip()
        types.append(type)
    data['type'] = ','.join(types)
    color = soup.find('div', attrs = {'class':'colors-info'}).find('span', attrs = {'class':'colors-info-name'}).text.strip()
    data['color'] = color

    return data

In [None]:
#to check fetch_product function
product_url = 'https://shop.mango.com/nl-en/women/blouses-and-shirts-shirts/wrap-linen-blend-shirt_67035755.html?c=05'
product_soup = soup_with_selenium(product_url, product=True)
product_data = fetch_product(product_soup)
display(product_data)

In [17]:
def fetch_combi_url(soup):    
    # Find the combi_box
    combi_box = soup.find('ul', id='recommendations.getTotalLook-list')
    
    # Initialize an empty list to store combi links
    combi_links = []
    
    # Extract combi links if combi_box is found
    if combi_box:
        items = combi_box.find_all('a')
        for item in items:
            link = 'https://shop.mango.com' + item.get('href')
            combi_links.append(link)
    
    return combi_links

In [12]:
def fetch_combi_product(soup):
    image_box = soup.find_all('div', attrs={'class': 'image-btn'})[-1]
    data = {}
    if image_box:
        item = image_box.find('img')
        link = 'https:' + item.get('src')
        data['combi_image_link'] = link
                
    type_boxes = soup.find_all('a', attrs = {'class':'breadcrumbs-link'})
    types = []
    for type_box in type_boxes:
        type = type_box.find('span', attrs = {'itemprop':'name'}).text.strip()
        types.append(type)
    data['combi_type'] = ','.join(types)
    color = soup.find('div', attrs = {'class':'colors-info'}).find('span', attrs = {'class':'colors-info-name'}).text.strip()
    data['combi_color'] = color

    return data

In [152]:
def fetch_product_and_combi(url):
    # Call soup_with_selenium with scroll=True to ensure all content is loaded
    soup = soup_with_selenium(url, scroll=True, product=True)

    if soup.find('title').text == 'Access Denied':
        return pd.DataFrame([]), True 

    product = fetch_product(soup)
    if not product:
        return pd.DataFrame([]), False

    combi_links = fetch_combi_url(soup)

    combi_products = []
    for link in combi_links:
        combi_soup = soup_with_selenium(link, product=True)
        combi = fetch_product(combi_soup)
        if combi:
            combi_products.append(combi)
    
    data = []
    for combi_product in combi_products:
        data.append({'image_link': product['image_link'],
                    'type' : product['type'],
                    'color' : product['color'],                     
                    'combi_image_link': combi_product['image_link'],
                    'combi_type': combi_product['type'],
                    'combi_color': combi_product['color']})

    # Create a DataFrame from the list of dictionaries
    df = pd.DataFrame(data)

    return df, False
   

In [70]:
#to check fetch_product and combi function
data = fetch_product_and_combi(product_url)
print(data)


Unnamed: 0,image_link,type,color,combi_image_link,combi_type,combi_color
0,https://st.mngbcn.com/rcs/pics/static/T6/fotos...,"Women,Blouses and shirts,Shirts",Ecru,https://st.mngbcn.com/rcs/pics/static/T6/fotos...,"Women,Skirts,Long",Ecru
1,https://st.mngbcn.com/rcs/pics/static/T6/fotos...,"Women,Blouses and shirts,Shirts",Ecru,https://st.mngbcn.com/rcs/pics/static/T6/fotos...,"Women,Shoes,Flat sandals",Black
2,https://st.mngbcn.com/rcs/pics/static/T6/fotos...,"Women,Blouses and shirts,Shirts",Ecru,https://st.mngbcn.com/rcs/pics/static/T6/fotos...,"Women,Bags,Leather",Chocolate
3,https://st.mngbcn.com/rcs/pics/static/T6/fotos...,"Women,Blouses and shirts,Shirts",Ecru,https://st.mngbcn.com/rcs/pics/static/T6/fotos...,"Women,Jewellery,Earrings",Gold


In [153]:
def save_data(catalog_url_list):
    result = pd.DataFrame()
    for url in catalog_url_list:
        print(f"fetching catalog: \n{url}\n")
        product_urls = fetch_catalog(url)
        print(f'item count: {len(product_urls)}')
        count = 1
        for product_url in product_urls:
            print(f'Progress: {count} / {len(product_urls)}', end='\r')
            df, error = fetch_product_and_combi(product_url)
            if error:
                print(f'Access Denied error') 
                return result
            result = pd.concat([result, df], ignore_index=True)
            count += 1
    return result

In [154]:
# catalog_url_list = [
#     'https://shop.mango.com/nl-en/women/blouses-and-shirts_c78920337', 
#     'https://shop.mango.com/nl-en/women/dresses-and-jumpsuits_c55363448',
#     'https://shop.mango.com/nl-en/women/t-shirts_c66796663',
#     'https://shop.mango.com/nl-en/women/tops_c19912693',
#     'https://shop.mango.com/nl-en/women/sweaters-and-cardigans_c87138853',
#     'https://shop.mango.com/nl-en/women/jackets_c16573202',
#     'https://shop.mango.com/nl-en/women/blazers_c86403599',
#     'https://shop.mango.com/nl-en/women/coats_c67886633',
#     'https://shop.mango.com/nl-en/women/trousers_c52748027',
#     'https://shop.mango.com/nl-en/women/skirts_c20673898',
#     'https://shop.mango.com/nl-en/women/jeans_c12563337',
#      'https://shop.mango.com/nl-en/women/shorts_c13128150'
#     ]

# all_data_shorts = save_data(catalog_url_list)


fetching catalog: 
https://shop.mango.com/nl-en/women/jeans_c12563337

item count: 152
totalLook was not found for https://shop.mango.com/nl-en/women/jeans-mom--slouchy/mom-high-waist-jeans_67040316.html?c=TM
totalLook was not found for https://shop.mango.com/nl-en/women/jeans-skinny--slim/waxed-slim-crop-jeans_67000680.html?c=TO
totalLook was not found for https://shop.mango.com/nl-en/women/jeans-skinny--slim/slim-cropped-jeans_67094013.html?c=TM
totalLook was not found for https://shop.mango.com/nl-en/women/jeans-straight/straight-jeans-with-forward-seams_67001012.html
totalLook was not found for https://shop.mango.com/nl-en/women/jeans-skinny--slim/skinny-push-up-jeans_67010381.html?c=TO
totalLook was not found for https://shop.mango.com/nl-en/women/jeans-straight/straight-jeans-with-forward-seams_67050450.html?c=DO
totalLook was not found for https://shop.mango.com/nl-en/women/jeans-wideleg/jeans-culotte-high-waist_67040298.html?c=07
totalLook was not found for https://shop.mango.c

In [157]:
all_data_shorts.to_csv('Mango data/short.csv')

## 2- Data Cleaning

In [1543]:
blouse_shirt = pd.read_csv('Mango data/Blouse and Shirt.csv')
dress_1 = pd.read_csv('Mango data/Dress_1.csv')
dress_2 = pd.read_csv('Mango data/Dress_2.csv')
dress_3 = pd.read_csv('Mango data/Dress_3.csv')
tshirt = pd.read_csv('Mango data/tshirt.csv')
tops = pd.read_csv('Mango data/tops.csv')
sweaters = pd.read_csv('Mango data/sweaters.csv')
jackets = pd.read_csv('Mango data/jackets.csv')
blazer_coat = pd.read_csv('Mango data/blazer_coat.csv')
trouser_skirt = pd.read_csv('Mango data/trouser_skirt.csv')
jean = pd.read_csv('Mango data/jean.csv')
short = pd.read_csv('Mango data/short.csv')

In [1544]:
data = pd.concat([blouse_shirt, dress_1, dress_2, dress_3, tshirt,tops,sweaters,jackets,blazer_coat,trouser_skirt,jean,short], axis=0)
data.reset_index(drop=True, inplace=True)

In [1545]:
data.shape

(3961, 7)

In [1546]:
data.drop(columns='Unnamed: 0', inplace=True)

column['type'] cleaning

In [1547]:
types = ['Dresses', 'Jumpsuits', 'T-shirts','Tanks', 'Tops', 'Blouses',
        'Shirts', 'Sweaters', 'Cardigans', 'Sweatshirts', 'Jackets','Gilets',
        'Blazers','Suit jackets', 'Trench coats', 'Coats', 'Quilted coats/Padded']
data['type'] = data['type'].apply(lambda x: x.split(',')[-1].strip() if isinstance(x, str) and x.split(',')[-1].strip() in types else x)

In [1548]:
data['type'] = data['type'].apply(lambda x: '-'.join(x.split(',')[1:]) if isinstance(x, str) and 'Trousers' in x else x)
data['type'] = data['type'].apply(lambda x: '-'.join(x.split(',')[1:]) if isinstance(x, str) and 'Jeans' in x else x)
data['type'] = data['type'].apply(lambda x: '-'.join(x.split(',')[1:]) if isinstance(x, str) and 'Skirts' in x else x)
data['type'] = data['type'].apply(lambda x: '-'.join(x.split(',')[1:]) if isinstance(x, str) and 'Shorts' in x else x)

In [1549]:
data.drop(data[data['type'] == 'Women,Pyjamas'].index, inplace=True)
data.drop(data[data['type'] == 'Women,Pyjamas,Bottoms'].index, inplace=True)

In [1550]:
pd.set_option('display.max_colwidth', None)
data[data['type']== 'Women,Dresses and jumpsuits,Long']['image_link']
data['type'].replace({'Women,Dresses and jumpsuits,Long': 'Jumpsuits'}, inplace=True)


In [1551]:
data[data['type']== 'Women,Dresses and jumpsuits,Short']['image_link']
data['type'].replace({'Women,Dresses and jumpsuits,Short': 'Jumpsuits'}, inplace=True)


In [1552]:
data[data['type']=='Women,More accessories,Other accessories']['image_link']
data.drop(data[data['type'] == 'Women,More accessories,Other accessories'].index, inplace=True)

In [1553]:
data[data['type']=='Women,Jackets,Denim']['image_link']
data['type'].replace({'Women,Jackets,Denim': 'Jackets'}, inplace=True)

In [1554]:
data[data['type']=='Women,Sweaters and cardigans,Striped']['image_link']
data['type'].replace({'Women,Sweaters and cardigans,Striped': 'Cardigans'}, inplace=True)
  

In [1555]:
data[data['type']=='Trousers-Party']['image_link']
data['type'].replace({'Trousers-Party': 'Trousers-Leggings'}, inplace=True)

In [1556]:
data[data['type']=='Women,T-shirts,Long sleeve']['image_link']
data['type'].replace({'Women,T-shirts,Long sleeve': 'T-shirts'}, inplace=True)

In [1557]:
data[data['type']=='Women,Blouses and shirts,Cotton']['image_link']
data['type'].replace({'Women,Blouses and shirts,Cotton': 'Shirts'}, inplace=True)

In [1604]:
data['type'].value_counts()

Dresses                 671
Jeans                   461
Trousers                394
Sweaters                327
Shirts                  274
Skirts                  258
Jackets                 244
Tshirts                 225
Tops                    179
Cardigans               140
Jumpsuits               132
Blouses                 108
Blazers                  81
Shorts                   60
Coats                    48
Suit jackets             45
Trench coats             42
Gilets                   42
Sweatshirts              32
Quilted coats/Padded     30
Name: type, dtype: int64

In [1559]:
data['type'].replace({'T-shirts': 'Tshirts'}, inplace=True)

In [1560]:
def process_type(type):
    if isinstance(type, str):
        parts = type.split('-')
        if len(parts) > 1 and parts[0].strip():
            return parts[0].strip()
    return type

data['type'] = data['type'].apply(process_type)

column['color] cleaning

In [1562]:
def combine_colors(data, colors_to_combine):
    for color in colors_to_combine:
        data.loc[data['color'].str.contains(color, case=False), 'color'] = color
        
colors_to_combine = ['Blue', 'Red', 'Green', 'Yellow', 'White', 'Grey', 'Brown', 'Orange', 'Pink', 'Black', 'Purple','Navy']
combine_colors(data, colors_to_combine)


In [1563]:
data[data['color']=='Vanilla']['image_link']
data['color'].replace({'Vanilla': 'Ecru'}, inplace=True)

In [1564]:
data[data['color']=='Lilac']['image_link']
data['color'].replace({'Lilac': 'Purple'}, inplace=True)

In [1565]:
data[data['color']=='Strawberry']['image_link']
data['color'].replace({'Strawberry': 'Pink'}, inplace=True)


In [1566]:
data[data['color']=='Maroon']['image_link']
data['color'].replace({'Maroon': 'Brown'}, inplace=True)

In [1567]:
data[data['color']=='Lime']['image_link']
data['color'].replace({'Lime': 'Yellow'}, inplace=True)

In [1568]:
data[data['color']=='Clementine']['image_link']
data['color'].replace({'Clementine': 'Orange'}, inplace=True)

In [1569]:
data[data['color']=='Wine']['image_link']
data['color'].replace({'Wine': 'Burgundy'}, inplace=True)

In [1570]:
data[data['color']=='Mustard']['image_link']
data['color'].replace({'Mustard': 'Yellow'}, inplace=True)

In [1571]:
data[data['color']=='Cognac']['image_link']
data['color'].replace({'Cognac': 'Brown'}, inplace=True)

In [1572]:
data[data['color']=='Cherry']['image_link']
data['color'].replace({'Cherry': 'Burgundy'}, inplace=True)

In [1573]:
data[data['color']=='Fuchsia']['image_link']
data['color'].replace({'Fuchsia': 'Pink'}, inplace=True)

In [1603]:
data['color'].value_counts()

Black        771
Blue         564
Ecru         329
White        329
Grey         283
Green        269
Khaki        198
Brown        183
Pink         167
Beige        146
Red          119
Navy         112
Sand          67
Charcoal      52
Silver        51
Orange        34
Yellow        32
Purple        29
Burgundy      21
Russet        15
Gold          14
Turquoise      8
Name: color, dtype: int64

column['combi_type'] cleaning

In [1575]:
types = ['Cushion covers','Pillow cases','Duvet cover','Blankets','Mikado and candles','Pyjamas','Bikinis and swimsuits','Rugs']
data.drop(data[data['combi_type'].apply(lambda x: isinstance(x, str) and x.split(',')[1].strip() in types)].index, inplace=True)

In [1576]:
data['combi_type'] = data['combi_type'].apply(lambda x: x.split(',')[1].strip() if isinstance(x, str) else x)

In [1577]:
data[data['combi_type']=='More accessories']['combi_image_link']
data.drop(data[data['combi_type'] == 'More accessories'].index, inplace=True)

In [1578]:
data['combi_type'].replace({'T-shirts': 'Tshirts'}, inplace=True)

In [1602]:
data['combi_type'].value_counts()

                                  

Shoes                     912
Jewellery                 818
Bags                      814
Sweaters and cardigans    184
Trousers                  173
Jeans                     148
Blouses and shirts        109
Skirts                    105
Tshirts                   101
Tops                       93
Sunglasses                 89
Jackets                    83
Blazers                    60
Belts                      52
Coats                      17
Shorts                     17
Dresses and jumpsuits       9
Foulards and scarves        7
Wallets and cases           1
Hats and caps               1
Name: combi_type, dtype: int64

column['combi_color'] cleaning

In [1580]:
def combi_colors(data, colors_to_combine):
    for color in colors_to_combine:
        data.loc[data['combi_color'].str.contains(color, case=False), 'combi_color'] = color
        
colors_to_combine = ['Blue', 'Red', 'Green', 'Yellow', 'White', 'Grey', 'Brown', 'Orange', 'Pink', 'Black', 'Purple','Navy', 'Silver']
combi_colors(data, colors_to_combine)

In [1581]:
data[data['combi_color']=='Vanilla']['combi_image_link']
data['combi_color'].replace({'Vanilla': 'Ecru'}, inplace=True)

In [1582]:
data[data['combi_color']=='Chocolate']['combi_image_link']
data['combi_color'].replace({'Chocolate': 'Brown'}, inplace=True)

In [1583]:
data[data['combi_color']=='Fuchsia']['combi_image_link']
data['combi_color'].replace({'Fuchsia': 'Pink'}, inplace=True)

In [1584]:
data[data['combi_color']=='Peach']['combi_image_link']
data['combi_color'].replace({'Peach': 'Nude'}, inplace=True)

In [1585]:
data[data['combi_color']=='Copper']['combi_image_link']
data['combi_color'].replace({'Copper': 'Nude'}, inplace=True)

In [1586]:
data[data['combi_color']=='Coffee']['combi_image_link']
data['combi_color'].replace({'Coffee': 'Brown'}, inplace=True)

In [1587]:
data[data['combi_color']=='Wine']['combi_image_link']
data['combi_color'].replace({'Wine': 'Burgundy'}, inplace=True)

In [1588]:
data[data['combi_color']=='Caramel']['combi_image_link']
data['combi_color'].replace({'Caramel': 'Ecru'}, inplace=True)

In [1589]:
data[data['combi_color']=='Lime']['combi_image_link']
data['combi_color'].replace({'Lime': 'Yellow'}, inplace=True)

In [1590]:
data[data['combi_color']=='Clementine']['combi_image_link']
data['combi_color'].replace({'Clementine': 'Orange'}, inplace=True)

In [1591]:
data[data['combi_color']=='Ochre']['combi_image_link']
data['combi_color'].replace({'Ochre': 'Brown'}, inplace=True)

In [1592]:
data[data['combi_color']=='Maroon']['combi_image_link']
data['combi_color'].replace({'Maroon': 'Burgundy'}, inplace=True)

In [1593]:
data[data['combi_color']=='Strawberry']['combi_image_link']
data['combi_color'].replace({'Strawberry': 'Pink'}, inplace=True)

In [1601]:
data['combi_color'].value_counts()

Black       1330
Silver       641
Gold         407
White        353
Ecru         197
Blue         163
Grey         151
Brown        150
Beige         92
Leather       44
Red           34
Pink          33
Khaki         31
Green         31
Charcoal      28
Navy          26
Burgundy      25
Sand          22
Nude          22
Yellow         8
Orange         3
Russet         2
Name: combi_color, dtype: int64

Missing values

In [1595]:
data.isna().sum()

image_link           0
type                 4
color                0
combi_image_link     0
combi_type          97
combi_color          0
dtype: int64

In [1596]:
data[data['type'].isna()]['image_link']

1146    https://st.mngbcn.com/rcs/pics/static/T5/fotos/S20/57050249_43_B.jpg?ts=1679665231304&imwidth=194&imdensity=2
1147    https://st.mngbcn.com/rcs/pics/static/T5/fotos/S20/57050249_43_B.jpg?ts=1679665231304&imwidth=194&imdensity=2
1148    https://st.mngbcn.com/rcs/pics/static/T5/fotos/S20/57050249_43_B.jpg?ts=1679665231304&imwidth=194&imdensity=2
1149    https://st.mngbcn.com/rcs/pics/static/T5/fotos/S20/57050249_43_B.jpg?ts=1679665231304&imwidth=194&imdensity=2
Name: image_link, dtype: object

In [1597]:
data.loc[data['type'].isna(), 'type'] = 'Dresses'

In [1598]:
data.loc[data['combi_type'].isna()].to_csv('missing combi_types.csv')

In [1599]:
data.drop(data[data['combi_type'].isna()].index, inplace=True)

In [1607]:
data['match']=1

In [1608]:
data.reset_index(drop=True, inplace=True)
data.to_csv('Mango data/cleaned_Mango_data.csv')

## 3- Image download

In [1437]:
def modified_url(url):
    # Split the URL by '?' to separate the base URL and the parameters
    base_url, params = url.split('?')

    # Split the parameters by '&' to separate them
    params_list = params.split('&')

    # Create a dictionary to store the parameters
    params_dict = {}
    for param in params_list:
        key, value = param.split('=')
        params_dict[key] = value

    # Modify the parameters
    params_dict['imwidth'] = '224'
    params_dict['imdensity'] = '3'

    # Reconstruct the modified URL
    return base_url + '?' + '&'.join([f"{key}={value}" for key, value in params_dict.items()])


In [1438]:
def download_image(url, folder, name):
    url = modified_url(url)
    # Send a GET request to the image URL
    response = requests.get(url)

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Open a file in binary write mode and write the image content to it
        with open(folder + '/' + name + ".jpg", "wb") as file:
            file.write(response.content)    
    else:
        print("Failed to download image url:", url)

In [None]:
for index, row in data.iterrows():
        image_url = row['image_link']
        image_name = f"A{index}"  # Creating a unique name for each image
        print(f'Progress: {index} / {data.shape[0]}', end='\r')
        download_image(image_url, 'A-Mango-images', image_name)

In [None]:
for index, row in data.iterrows():
        image_url = row['combi_image_link']
        image_name = f"B{index}"  # Creating a unique name for each image
        print(f'Progress: {index} / {data.shape[0]}', end='\r')
        download_image(image_url, 'B-Mango-images', image_name)

Progress: 3792 / 3793