In [1]:
# Required libraries
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
# from selenium.webdriver.support import expected_conditions as EC
# from selenium.webdriver.support.ui import Select

import xlsxwriter
import re

# Problem:
Looking for someone able to automatically collect datas from this website:

https://mm.lv/en
(website is in 3 languages)  
I need to collect the name of the categories / sub-categories / sub-sub-catégories / sub-sub-sub-categories.  
I need to have them in the 3 languages of the website.  
Datas can be recorded as follow (with the colors for me to distinguish the categories and subcategories)  
https://zupimages.net/up/20/16/ja3u.png  

The main difficult part, is that when you change the language of the website, the subcategories are changing place (it is certainly sorted by alphabetic position).  

This makes the translation not matching.  
# Solution:
### Solution 1
Category - easy scrape
Sub Category - follow the icons. (icons have specific links or maybe data-id you just have to store it first)
2x Sub Category - 
3x Sub Category - Base on the number of items. If there are categories which has same number of items, select that one then manually change the translation
### Solution 2 (more easier but would take longer)
- Follow each category's breadcrumbs 
- Then manually click the change language using selenium 
#### Algorithm:
##### Phase 1
    Get all the english category, sub category, 2x sub category, and 3x sub category with their respective links which could be extracted from anchor href
##### Phase 2
    1. Go to each links gathered from Phase 1
    2. Using Python's Selenium, click on the RU then Web Scrape the breadcrumbs
    3. Using Python's Selenium, click on the LV then Web Scrape the breadcrumbs
    4. Using Python's Selenium, click back to EN (you won't have to Web Scrape English since you already have it.)
    5. Repeat steps 1-4 until we finish all the links.

### 3 Formats which means, 3 ways to extract data:
1. Through 'category' class (top level)
2. Through 'title' and 'subcat' class (medium level)
3. Through 'title' and 'active' class (lowest level)

Note:
- Category(c) -> Sub Category(cc) -> 2x Sub Category(ccc) -> 3x Sub Category(cccc)
- Categories are distinguished by colors in .xlsx
- Data is a array of dict containing the following: en: , ru: , lv: , parent: , link:

In [2]:
url = 'https://mm.lv'
en_id = 'en_US'
ru_id = 'ru_RU'
lv_id = 'lt_LT'
data = []

In [234]:
# adding options
chromeOptions = Options()
chromeOptions.add_argument('--kiosk')
chromeOptions.page_load_strategy = 'normal'

# opening web driver
driver = webdriver.Chrome('../chromedriver/chromedriver', options=chromeOptions)
driver.get(url) # go to url

# Phase 1 & 2
This was supposed to be only phase 1 then next chapter is phase 2 but I accidentally joined it together :P

In [4]:
 # go to url
driver.get(url)
c = []
try:
    driver.find_element_by_id(en_id).click() # click english
except:
    print('Already in english')
lists = driver.find_elements_by_tag_name('li')
for l in lists[:12]: # get the categories
    en = l.find_element_by_class_name('category')
    c.append({
        'en': en.text, 
        'ru': None, 
        'lv': None,
        'parent': None,
        'url': en.get_attribute('href')
    })

# translate
driver.find_element_by_id(ru_id).click() # click russian
lists = driver.find_elements_by_tag_name('li')
for i, l in enumerate(lists[:12]): # translate to russian
    c[i]['ru'] = l.find_element_by_class_name('category').text    

driver.find_element_by_id(lv_id).click() # click latvian
lists = driver.find_elements_by_tag_name('li')
for i, l in enumerate(lists[:12]): # translate to latvian
    c[i]['lv'] = l.find_element_by_class_name('category').text
    
driver.find_element_by_id(en_id).click() # click english

print(len(c))

12


In [5]:
# get the sub categories
cc = []
for i in c:
    driver.get(i['url'])
    lists = driver.find_elements_by_class_name('listings') # get all elements with class listings
    for l in lists:
        category = l.find_element_by_class_name('category')
        cc.append({
            'en': category.text,
            'ru': None,
            'lv': None,
            'parent': i['en'],
            'url': category.get_attribute('href')
        })
    
    # translate
    driver.find_element_by_id(ru_id).click() # click russian
    lists = driver.find_elements_by_class_name('listings')
    for i, l in enumerate(lists):
        category = l.find_element_by_class_name('category')
        cc[i]['ru'] = category.text

    driver.find_element_by_id(lv_id).click() # click latvian
    lists = driver.find_elements_by_class_name('listings')
    for i, l in enumerate(lists):
        category = l.find_element_by_class_name('category')
        cc[i]['lv'] = category.text

driver.find_element_by_id(en_id).click() # click english

print(len(cc))

142


In [69]:
# view parent-current relationship for cc
names = [x['en'] for x in cc]
parent = [x['parent'] for x in cc]
print(len(cc))
for i in range(len(names)):
    print(parent[i], '>', names[i])

142
Transport > Cars
Transport > Car spare parts
Transport > Commercial Vehicles
Transport > Moto transport
Transport > Water transport
Transport > Transport rent
Transport > Personal transport
Transport > Transportation
Transport > Car services
Transport > Air transport
Transport > Carting, Buggy
Transport > Disabled persons Transports
Transport > RVs, Trailers
Transport > Trailers
Transport > Car service equipment  
Transport > Roof racks
Real estate > Apartments
Real estate > Houses
Real estate > Land property
Real estate > Farms, estates, manors
Real estate > Garages
Real estate > Bathhouses
Real estate > Buildings
Real estate > Car-care centers
Real estate > Factories
Real estate > Forest
Real estate > Gardens
Real estate > Hangars
Real estate > Mobile houses
Real estate > Modular houses
Real estate > Offices
Real estate > Parking places
Real estate > Shops
Real estate > Spaces
Real estate > Summer cottages
Real estate > Warehouses
Real estate > Workshops
Real estate > Auctions
Re

In [104]:
# get the 2x sub categories
ccc = []
for i in cc:
    driver.get(i['url'])
    try: # try finding for categories
        menu = driver.find_element_by_class_name('cat-menu')
        lists = menu.find_elements_by_class_name('category')
        if(len(lists) > 0):
            for l in lists:
                if(l.text != ''):
                    ccc.append({
                        'en': l.text,
                        'ru': None,
                        'lv': None,
                        'parent': i['en'],
                        'url': l.get_attribute('href')
                    })
            print('Category Type', i['en'])
        else: # if it doesn't exist then find subcats
            lists = menu.find_elements_by_class_name('subcat')
            for l in lists:
                if(l.text != ''):
                    ccc.append({
                        'en': l.text,
                        'ru': None,
                        'lv': None,
                        'parent': i['en'],
                        'url': l.get_attribute('href')
                    })
            print('Subcat Type', i['en'])
    except: 
        print('No 3rd Sub Menu', i['parent'], '>', i['en'])
            
print(len(ccc))

Subcat Type Cars
Category Type Car spare parts
Category Type Commercial Vehicles
Category Type Moto transport
Subcat Type Water transport
Subcat Type Transport rent
Category Type Personal transport
Subcat Type Transportation
Subcat Type Car services
Subcat Type Air transport
Subcat Type Carting, Buggy
Subcat Type Disabled persons Transports
Subcat Type RVs, Trailers
Subcat Type Trailers
Subcat Type Car service equipment  
No 3rd Sub Menu Transport > Roof racks
Category Type Apartments
Category Type Houses
Category Type Land property
Subcat Type Farms, estates, manors
Subcat Type Garages
No 3rd Sub Menu Real estate > Bathhouses
No 3rd Sub Menu Real estate > Buildings
No 3rd Sub Menu Real estate > Car-care centers
No 3rd Sub Menu Real estate > Factories
No 3rd Sub Menu Real estate > Forest
No 3rd Sub Menu Real estate > Gardens
No 3rd Sub Menu Real estate > Hangars
No 3rd Sub Menu Real estate > Mobile houses
No 3rd Sub Menu Real estate > Modular houses
No 3rd Sub Menu Real estate > Office

In [116]:
# view parent-current relationship for ccc
names = [x['en'] for x in ccc]
parent = [x['parent'] for x in ccc]
print(len(ccc))
for i in range(len(names)):
    print(parent[i], '>', names[i])

1320
Cars > Alfa-Romeo
Cars > Audi
Cars > Bentley
Cars > BMW
Cars > Cadillac
Cars > Chevrolet
Cars > Chrysler
Cars > Citroen
Cars > Dodge
Cars > Fiat
Cars > Ford
Cars > Honda
Cars > Hyundai
Cars > Infiniti
Cars > Jaguar
Cars > Jeep
Cars > Kia
Cars > Land-Rover
Cars > Lexus
Cars > Mazda
Cars > Mercedes-Benz
Cars > Mini
Cars > Mitsubishi
Cars > Nissan
Cars > Opel
Cars > Peugeot
Cars > Porsche
Cars > Renault
Cars > Rover
Cars > Saab
Cars > Seat
Cars > Skoda
Cars > Smart
Cars > Subaru
Cars > Suzuki
Cars > Toyota
Cars > Volkswagen
Cars > Volvo
Cars > Electric cars
Cars > Other brands
Cars > Russian
Cars > Sport cars
Car spare parts > Parts from one a/m
Car spare parts > Tires
Car spare parts > Alloy wheels
Car spare parts > Auto spares
Car spare parts > Accessories
Car spare parts > Auto tuning
Car spare parts > Batteries
Car spare parts > Car chemicals, cosmetics
Car spare parts > Cars for spare parts
Car spare parts > Chargers
Car spare parts > Interior
Car spare parts > Oils
Car spare pa

Technics for office and shop > Other
Technics for office and shop > Repair and maintenance
Video surveillance, alarms > Alarm panels
Video surveillance, alarms > Sensors
Video surveillance, alarms > Surveillance cameras
Video surveillance, alarms > Video recorders
Video surveillance, alarms > Intercoms
Video surveillance, alarms > Code locks
Video surveillance, alarms > Consumables
Video surveillance, alarms > Installation and maintenance
Video surveillance, alarms > Other
Optical devices > Binoculars
Optical devices > Telescopes
Optical devices > Microscopes
Optical devices > Accessories
Optical devices > Repair services
Optical devices > Other
Communication devices > Phones
Communication devices > Radios
Construction works > Asphalt paving
Construction works > Cleaning, garbage collection
Construction works > Concrete and reinforced concrete works
Construction works > Dismantling
Construction works > Documentation and supervision
Construction works > Drilling works
Construction works

Jewelry > Pendants
Jewelry > Bracelets
Jewelry > Medallions, brooches
Jewelry > Precious stones
Jewelry > Jewelry decorations
Jewelry > Jewelry services
Watches,Handbags,Accessories > Watches
Watches,Handbags,Accessories > Handbags, clutches
Watches,Handbags,Accessories > Sunglasses, glasses
Watches,Handbags,Accessories > Belts, suspenders
Watches,Handbags,Accessories > Tie pins, brooches
Watches,Handbags,Accessories > Cufflinks
Watches,Handbags,Accessories > Wallets, cardholders
Watches,Handbags,Accessories > Backpacks
Watches,Handbags,Accessories > Suitcases
Watches,Handbags,Accessories > Men's jewelry
Watches,Handbags,Accessories > Other
Dogs, puppies > Dogs
Dogs, puppies > Feed
Dogs, puppies > Coaching
Dogs, puppies > Haircutting, trimming, care
Dogs, puppies > Breeding
Dogs, puppies > Cages and carriers
Dogs, puppies > Houses and beddings
Dogs, puppies > Accessories
Dogs, puppies > Other
Cats, kittens > Cats
Cats, kittens > Feed
Cats, kittens > Carrier bags
Cats, kittens > Cat hou

In [118]:
# traslation in ccc
cccc = []
ccccc = []
for i in ccc:
    driver.get(i['url'])
    try:
        menu = driver.find_element_by_class_name('cat-menu')
        lists = menu.find_elements_by_class_name('listings') # check listings
        if(len(lists) > 0):
            for l in lists:
                title = l.find_element_by_class_name('category')
                cccc.append({
                    'en': title.text,
                    'ru': None,
                    'lv': None,
                    'parent': i['en'],
                    'url': title.get_attribute('href')
                })
                subcats = lists.find_elements_by_class_name('subcat')
                for subcat in subcats:
                    ccccc.append({
                        'en': subcat.text,
                        'ru': None,
                        'lv': None,
                        'parent': title.text,
                        'url': subcat.get_attribute('href')
                    })
                print('Category Type', i['en'], '>', title.text['en'])
        else: # if there are no listings then the item is in the list
            try:
                active = menu.find_element_by_class_name('active')
            except:
                try:
                    lists = menu.find_elements_by_class_name('subcat')
                    for l in lists:
                        if(l.text != ''):
                            cccc.append({
                                'en': l.text,
                                'ru': None,
                                'lv': None,
                                'parent': i['en'],
                                'url': l.get_attribute('href')
                            })
                    print('Subcat Type', i['parent'], '>', i['en'])
                except:
                    print('No 4th Sub Menu', i['parent'], '>', i['en'])
    except: # else there is no 4th sub menu
            print('No 4th Sub Menu', i['parent'], '>', i['en'])
        
print(len(cccc))
print(len(ccccc))

Subcat Type Cars > Alfa-Romeo
Subcat Type Cars > Audi
Subcat Type Cars > Bentley
Subcat Type Cars > BMW
Subcat Type Cars > Cadillac
Subcat Type Cars > Chevrolet
Subcat Type Cars > Chrysler
Subcat Type Cars > Citroen
Subcat Type Cars > Dodge
Subcat Type Cars > Fiat
Subcat Type Cars > Ford
Subcat Type Cars > Honda
Subcat Type Cars > Hyundai
Subcat Type Cars > Infiniti
Subcat Type Cars > Jaguar
Subcat Type Cars > Jeep
Subcat Type Cars > Kia
Subcat Type Cars > Land-Rover
Subcat Type Cars > Lexus
Subcat Type Cars > Mazda
Subcat Type Cars > Mercedes-Benz
Subcat Type Cars > Mini
Subcat Type Cars > Mitsubishi
Subcat Type Cars > Nissan
Subcat Type Cars > Opel
Subcat Type Cars > Peugeot
Subcat Type Cars > Porsche
Subcat Type Cars > Renault
Subcat Type Cars > Rover
Subcat Type Cars > Saab
Subcat Type Cars > Seat
Subcat Type Cars > Skoda
Subcat Type Cars > Smart
Subcat Type Cars > Subaru
Subcat Type Cars > Suzuki
Subcat Type Cars > Toyota
Subcat Type Cars > Volkswagen
Subcat Type Cars > Volvo
Subc

Subcat Type Job search > Car business
Subcat Type Job search > Carier start
Subcat Type Job search > Domestic staff
Subcat Type Job search > Education, science
Subcat Type Job search > Entertainment, gambling
Subcat Type Job search > Finance, jurisprudence
Subcat Type Job search > Hotel business, tourism
Subcat Type Job search > IT industry, telecommunicatio
Subcat Type Job search > Manufacturing, agriculture
Subcat Type Job search > Marketing, advertising
Subcat Type Job search > Medicine, pharmaceutics
Subcat Type Job search > Real estate
Subcat Type Job search > Restaurants, cafes, bars
Subcat Type Job search > Security and safety
Subcat Type Job search > Trade, services
Subcat Type Job search > Transport, logistics
No 4th Sub Menu Job search > Work abroad
No 4th Sub Menu Job search > Other
Subcat Type Courses and training > Driving schools
Subcat Type Courses and training > Beauty industry
Subcat Type Courses and training > Language courses
Subcat Type Courses and training > Tutori

No 4th Sub Menu Heating > Heat pumps
No 4th Sub Menu Heating > Flue
No 4th Sub Menu Heating > Pumps
No 4th Sub Menu Heating > Boilers
No 4th Sub Menu Heating > Radiators, thermostats
No 4th Sub Menu Heating > Heated floors
No 4th Sub Menu Heating > Consumables
No 4th Sub Menu Heating > Repair and installation
No 4th Sub Menu Heating > Other
No 4th Sub Menu Agricultural machinery > Combine harvesters
No 4th Sub Menu Agricultural machinery > Conveyor belts
No 4th Sub Menu Agricultural machinery > Cultivators
No 4th Sub Menu Agricultural machinery > Drinking trough bowls
No 4th Sub Menu Agricultural machinery > Equipment and inventory
No 4th Sub Menu Agricultural machinery > Extrusion machines
No 4th Sub Menu Agricultural machinery > Feeders
No 4th Sub Menu Agricultural machinery > Fertilizer spreaders
No 4th Sub Menu Agricultural machinery > For crop farming
No 4th Sub Menu Agricultural machinery > For fish farms
No 4th Sub Menu Agricultural machinery > Forage harvesters
No 4th Sub Menu 

In [148]:
print(len(cccc))
for i in cccc:
    print(i)

2291
{'en': '146', 'ru': '146', 'lv': '146', 'parent': 'Alfa-Romeo', 'url': 'https://mm.lv/alfa-romeo-146-en'}
{'en': '147', 'ru': '147', 'lv': '147', 'parent': 'Alfa-Romeo', 'url': 'https://mm.lv/alfa-romeo-147-en'}
{'en': '155', 'ru': '155', 'lv': '155', 'parent': 'Alfa-Romeo', 'url': 'https://mm.lv/alfa-romeo-155-en'}
{'en': '156', 'ru': '156', 'lv': '156', 'parent': 'Alfa-Romeo', 'url': 'https://mm.lv/alfa-romeo-156-en'}
{'en': '159', 'ru': '159', 'lv': '159', 'parent': 'Alfa-Romeo', 'url': 'https://mm.lv/alfa-romeo-159-en'}
{'en': '164', 'ru': '164', 'lv': '164', 'parent': 'Alfa-Romeo', 'url': 'https://mm.lv/alfa-romeo-164-en'}
{'en': '166', 'ru': '166', 'lv': '166', 'parent': 'Alfa-Romeo', 'url': 'https://mm.lv/alfa-romeo-166-en'}
{'en': 'Guilietta', 'ru': 'Guilietta', 'lv': 'Guilietta', 'parent': 'Alfa-Romeo', 'url': 'https://mm.lv/alfa-romeo-guilietta-en'}
{'en': 'Brera', 'ru': 'Brera', 'lv': 'Brera', 'parent': 'Alfa-Romeo', 'url': 'https://mm.lv/alfa-romeo-brera-en'}
{'en': 'G

{'en': 'Spare parts', 'ru': 'Запчасти', 'lv': 'Rezerves daļas', 'parent': 'Tractors', 'url': 'https://mm.lv/tractor-spare-parts'}
{'en': 'Tires', 'ru': 'Покрышки', 'lv': 'Riepas', 'parent': 'Tractors', 'url': 'https://mm.lv/tractor-tires'}
{'en': 'Seedlings, seeds, seedlings', 'ru': 'Рассада, семена, саженцы', 'lv': 'Dēsti, sēklas, stādi', 'parent': 'Gardening', 'url': 'https://mm.lv/farms/seedlings-seeds-seedlings'}
{'en': 'Appliances for garden', 'ru': 'Техника для сада', 'lv': 'Dārza tehnika', 'parent': 'Gardening', 'url': 'https://mm.lv/farms/appliances-for-garden'}
{'en': 'Fertilizer, earth', 'ru': 'Удобрение, земля', 'lv': 'Mēslojums., augsne', 'parent': 'Gardening', 'url': 'https://mm.lv/farms/fertilizer-earth'}
{'en': 'Greenhouses', 'ru': 'Теплицы', 'lv': 'Siltumnīcas', 'parent': 'Gardening', 'url': 'https://mm.lv/farms/garden-accessories-greenhouses'}
{'en': 'Garden tools', 'ru': 'Садовый инвентарь', 'lv': 'Dārza inventārs', 'parent': 'Gardening', 'url': 'https://mm.lv/farms/g

In [133]:
# getting 4x sub categories
ccccc = [] 
for i in cccc:
    driver.get(i['url'])
    try:
        driver.find_element_by_id(ru_id).click() # click russian
        menu = driver.find_element_by_class_name('cat-menu')
        ru = menu.find_element_by_class_name('active').text
        driver.find_element_by_id(lv_id).click() # click latvian
        menu = driver.find_element_by_class_name('cat-menu')
        lv = menu.find_element_by_class_name('active').text
        i['ru'] = ru
        i['lv'] = lv
        driver.find_element_by_id(en_id).click() # click english
        print('No 5th Sub Menu', i['parent'], '>', active.text)
    except:
        try: # clickable
            driver.find_element_by_id(ru_id).click() # click russian
            menu = driver.find_element_by_class_name('cat-menu')
            ru = menu.find_element_by_class_name('title').text
            driver.find_element_by_id(lv_id).click() # click latvian
            menu = driver.find_element_by_class_name('cat-menu')
            lv = menu.find_element_by_class_name('title').text
            i['ru'] = ru
            i['lv'] = lv
            driver.find_element_by_id(en_id).click() # click english
            menu = driver.find_element_by_class_name('cat-menu')
            lists = menu.find_elements_by_class_name('subcat')
            for l in lists:
                ccccc.append({
                    'en': l.text,
                    'ru': None,
                    'lv': None,
                    'parent': i['en'],
                    'url': l.get_attribute('href')
                })
            print('Has a 5th Sub Menu')
        except:
            print('No 5th Sub Menu', i['parent'], '>', i['en'])
print(len(ccccc))

No 5th Sub Menu Alfa-Romeo > 146
No 5th Sub Menu Alfa-Romeo > 147
No 5th Sub Menu Alfa-Romeo > 155
No 5th Sub Menu Alfa-Romeo > 156
No 5th Sub Menu Alfa-Romeo > 159
No 5th Sub Menu Alfa-Romeo > 164
No 5th Sub Menu Alfa-Romeo > 166
No 5th Sub Menu Alfa-Romeo > Guilietta
No 5th Sub Menu Alfa-Romeo > Brera
No 5th Sub Menu Alfa-Romeo > GT
No 5th Sub Menu Alfa-Romeo > Mito
No 5th Sub Menu Alfa-Romeo > Spider
No 5th Sub Menu Alfa-Romeo > Other
No 5th Sub Menu Audi > A series
No 5th Sub Menu Audi > Q series
No 5th Sub Menu Audi > S series
No 5th Sub Menu Audi > RS series
No 5th Sub Menu Audi > R8
No 5th Sub Menu Audi > TT series
No 5th Sub Menu Audi > 80-200
No 5th Sub Menu Audi > Other
No 5th Sub Menu Bentley > Continental
No 5th Sub Menu Bentley > Bentayga
No 5th Sub Menu Bentley > Other
No 5th Sub Menu BMW > 1 series
No 5th Sub Menu BMW > 2 series
No 5th Sub Menu BMW > 3 series
No 5th Sub Menu BMW > 4 series
No 5th Sub Menu BMW > 5 series
No 5th Sub Menu BMW > 6 series
No 5th Sub Menu BMW 

No 5th Sub Menu Kia > Sephia
No 5th Sub Menu Kia > Shuma
No 5th Sub Menu Kia > Sorento
No 5th Sub Menu Kia > Soul
No 5th Sub Menu Kia > Sportage
No 5th Sub Menu Kia > Other
No 5th Sub Menu Land-Rover > Defender
No 5th Sub Menu Land-Rover > Discovery
No 5th Sub Menu Land-Rover > Discovery Sport
No 5th Sub Menu Land-Rover > Evoque
No 5th Sub Menu Land-Rover > Freelander
No 5th Sub Menu Land-Rover > Range Rover
No 5th Sub Menu Land-Rover > Range Rover Sport
No 5th Sub Menu Land-Rover > Range Rover Evoque
No 5th Sub Menu Land-Rover > Other
No 5th Sub Menu Lexus > CT
No 5th Sub Menu Lexus > ES
No 5th Sub Menu Lexus > GS
No 5th Sub Menu Lexus > GX
No 5th Sub Menu Lexus > IS
No 5th Sub Menu Lexus > LS
No 5th Sub Menu Lexus > LX
No 5th Sub Menu Lexus > NX
No 5th Sub Menu Lexus > RC
No 5th Sub Menu Lexus > RX
No 5th Sub Menu Lexus > SC
No 5th Sub Menu Lexus > Other
No 5th Sub Menu Mazda > 323
No 5th Sub Menu Mazda > 626
No 5th Sub Menu Mazda > 2
No 5th Sub Menu Mazda > 3
No 5th Sub Menu Mazda >

No 5th Sub Menu Seat > Toledo
No 5th Sub Menu Seat > Other
No 5th Sub Menu Skoda > Citigo
No 5th Sub Menu Skoda > Fabia
No 5th Sub Menu Skoda > Felicia
No 5th Sub Menu Skoda > Kodiaq
No 5th Sub Menu Skoda > Yeti
No 5th Sub Menu Skoda > Octavia
No 5th Sub Menu Skoda > Octavia FL
No 5th Sub Menu Skoda > Praktik
No 5th Sub Menu Skoda > Rapid
No 5th Sub Menu Skoda > Roomster
No 5th Sub Menu Skoda > Superb
No 5th Sub Menu Skoda > Other
No 5th Sub Menu Smart > ForFour
No 5th Sub Menu Smart > ForTwo
No 5th Sub Menu Smart > Roadster
No 5th Sub Menu Smart > Other
No 5th Sub Menu Subaru > B9 Tribeca
No 5th Sub Menu Subaru > BRZ
No 5th Sub Menu Subaru > Forester
No 5th Sub Menu Subaru > Impreza
No 5th Sub Menu Subaru > Impreza WRX
No 5th Sub Menu Subaru > Legacy
No 5th Sub Menu Subaru > Levorg
No 5th Sub Menu Subaru > Outback
No 5th Sub Menu Subaru > Tribeca
No 5th Sub Menu Subaru > XV
No 5th Sub Menu Subaru > WRX STI
No 5th Sub Menu Subaru > Other
No 5th Sub Menu Suzuki > Alto
No 5th Sub Menu Su

No 5th Sub Menu Auto spares > Engine and aggregates
No 5th Sub Menu Auto spares > Body
No 5th Sub Menu Auto spares > Electrical equipment
No 5th Sub Menu Auto spares > Interior
No 5th Sub Menu Auto spares > Auto silencers
No 5th Sub Menu Auto spares > Other
No 5th Sub Menu Trucks > Aerial work platform trucks
No 5th Sub Menu Trucks > All-terrain vehicles
No 5th Sub Menu Trucks > Autotransporters
No 5th Sub Menu Trucks > Cattle trucks
No 5th Sub Menu Trucks > Concrete trucks
No 5th Sub Menu Trucks > Container trucks
No 5th Sub Menu Trucks > Dumpers
No 5th Sub Menu Trucks > Manipulator trucks
No 5th Sub Menu Trucks > Refrigerator trucks
No 5th Sub Menu Trucks > Road trains
No 5th Sub Menu Trucks > Semi trucks
No 5th Sub Menu Trucks > Tank trucks
No 5th Sub Menu Trucks > Timber trucks
No 5th Sub Menu Trucks > Tow trucks
No 5th Sub Menu Trucks > Trailers
No 5th Sub Menu Trucks > Vans
No 5th Sub Menu Trucks > Waste trucks
No 5th Sub Menu Trucks > Other
No 5th Sub Menu Trailers and semi trai

No 5th Sub Menu Bicycles for children > 3-5 year 2" 86-110
No 5th Sub Menu Bicycles for children > 4-7 year 16" 100-125
No 5th Sub Menu Bicycles for children > 6-9 year 20" 115-135
No 5th Sub Menu Bicycles for children > 8-12 year 24" 125-150
No 5th Sub Menu Bicycles for children > 9-13 year 26" 135-160
No 5th Sub Menu Bicycles for children > Other
No 5th Sub Menu Bike equipment > Helmets
No 5th Sub Menu Bike equipment > Glasses
No 5th Sub Menu Bike equipment > Clothes
No 5th Sub Menu Bike equipment > Gloves
No 5th Sub Menu Bike equipment > Shoes
No 5th Sub Menu Bike equipment > For kids
No 5th Sub Menu Bike equipment > Other
No 5th Sub Menu Bike accessories > Bags
No 5th Sub Menu Bike accessories > Baskets
No 5th Sub Menu Bike accessories > Child carriers
No 5th Sub Menu Bike accessories > Computers
No 5th Sub Menu Bike accessories > Lights
No 5th Sub Menu Bike accessories > Locks
No 5th Sub Menu Bike accessories > Other
No 5th Sub Menu Bike spare parts > Forks
No 5th Sub Menu Bike sp

No 5th Sub Menu Daugavpils > Jauna Forstate
No 5th Sub Menu Daugavpils > Jaunbuve
No 5th Sub Menu Daugavpils > Jaunstropi
No 5th Sub Menu Daugavpils > Judovka
No 5th Sub Menu Daugavpils > Kalkuni
No 5th Sub Menu Daugavpils > Kimija
No 5th Sub Menu Daugavpils > Krizi
No 5th Sub Menu Daugavpils > Liginiski
No 5th Sub Menu Daugavpils > Mazstropi
No 5th Sub Menu Daugavpils > Mezciems
No 5th Sub Menu Daugavpils > Niderkuni
No 5th Sub Menu Daugavpils > Rugeli
No 5th Sub Menu Daugavpils > Veca Forstate
No 5th Sub Menu Daugavpils > Vecstropi
No 5th Sub Menu Daugavpils > Viduspogulanka
No 5th Sub Menu Daugavpils > Vizbuli
No 5th Sub Menu Riga > Agenskalns
No 5th Sub Menu Riga > Aplokciems
No 5th Sub Menu Riga > Bergi
No 5th Sub Menu Riga > Bierini
No 5th Sub Menu Riga > Bolderaja
No 5th Sub Menu Riga > Brasa
No 5th Sub Menu Riga > Bukulti
No 5th Sub Menu Riga > Bulli
No 5th Sub Menu Riga > Center
No 5th Sub Menu Riga > Ciekurkalns
No 5th Sub Menu Riga > Darzciems
No 5th Sub Menu Riga > Darzciem

No 5th Sub Menu Home staff > Nurse
No 5th Sub Menu Home staff > Other
No 5th Sub Menu Hotel business, tourism > Animator
No 5th Sub Menu Hotel business, tourism > Doorman
No 5th Sub Menu Hotel business, tourism > Guide
No 5th Sub Menu Hotel business, tourism > Housemaid
No 5th Sub Menu Hotel business, tourism > Porter
No 5th Sub Menu Hotel business, tourism > Other
No 5th Sub Menu IT industry, telecommunicatio > Computer technician
No 5th Sub Menu IT industry, telecommunicatio > Copywriter
No 5th Sub Menu IT industry, telecommunicatio > Database Administrator
No 5th Sub Menu IT industry, telecommunicatio > Developer TK
No 5th Sub Menu IT industry, telecommunicatio > Embedded programmer
No 5th Sub Menu IT industry, telecommunicatio > Flash animator
No 5th Sub Menu IT industry, telecommunicatio > Front-Back-end developer
No 5th Sub Menu IT industry, telecommunicatio > HTML Imposer
No 5th Sub Menu IT industry, telecommunicatio > Mobile application Developer
No 5th Sub Menu IT industry, te

No 5th Sub Menu Beauty salons > Colourist
No 5th Sub Menu Beauty salons > Cosmetologist
No 5th Sub Menu Beauty salons > Dermatologist
No 5th Sub Menu Beauty salons > Hairdresser
No 5th Sub Menu Beauty salons > Manicure master
No 5th Sub Menu Beauty salons > Masseur
No 5th Sub Menu Beauty salons > Pedicure master
No 5th Sub Menu Beauty salons > Permanent makeup master
No 5th Sub Menu Beauty salons > Stylist
No 5th Sub Menu Beauty salons > Visagist
No 5th Sub Menu Beauty salons > Other
No 5th Sub Menu Building > Architect
No 5th Sub Menu Building > Assemblyman
No 5th Sub Menu Building > Bulldozer driver
No 5th Sub Menu Building > Carpenter
No 5th Sub Menu Building > Concrete worker
No 5th Sub Menu Building > Crane operator
No 5th Sub Menu Building > Decorator
No 5th Sub Menu Building > Designer
No 5th Sub Menu Building > Electrician
No 5th Sub Menu Building > Engineer
No 5th Sub Menu Building > Excavator driver
No 5th Sub Menu Building > Foreman
No 5th Sub Menu Building > Handymen
No 5th

No 5th Sub Menu Trade, services > Cashier
No 5th Sub Menu Trade, services > Charwoman
No 5th Sub Menu Trade, services > Clerk
No 5th Sub Menu Trade, services > Director
No 5th Sub Menu Trade, services > Distributor
No 5th Sub Menu Trade, services > Head of
No 5th Sub Menu Trade, services > Janitor
No 5th Sub Menu Trade, services > Manager
No 5th Sub Menu Trade, services > Office worker
No 5th Sub Menu Trade, services > Operator
No 5th Sub Menu Trade, services > Packer
No 5th Sub Menu Trade, services > Sacker
No 5th Sub Menu Trade, services > Secretary
No 5th Sub Menu Trade, services > Seller
No 5th Sub Menu Trade, services > Seller-consultant
No 5th Sub Menu Trade, services > Stacker
No 5th Sub Menu Trade, services > Storekeeper
No 5th Sub Menu Trade, services > Technician
No 5th Sub Menu Trade, services > Other
No 5th Sub Menu Transport, logistics > Aviotehnician
No 5th Sub Menu Transport, logistics > Captain
No 5th Sub Menu Transport, logistics > Courier
No 5th Sub Menu Transport, lo

No 5th Sub Menu Kitchen appliances > Repair
No 5th Sub Menu Washing, ironing > Washing machines
No 5th Sub Menu Washing, ironing > Drying machines
No 5th Sub Menu Washing, ironing > Irons
No 5th Sub Menu Washing, ironing > Steam generators
No 5th Sub Menu Washing, ironing > Ironing boards
No 5th Sub Menu Washing, ironing > Washing machines repair
No 5th Sub Menu Washing, ironing > Other
No 5th Sub Menu Vacuum cleaners > Vacuum cleaners
No 5th Sub Menu Vacuum cleaners > Wet and dry vacuum cleaners
No 5th Sub Menu Vacuum cleaners > Water filter vacuum cleaners
No 5th Sub Menu Vacuum cleaners > Hand vacuum cleaners
No 5th Sub Menu Vacuum cleaners > Battery vacuum cleaners
No 5th Sub Menu Vacuum cleaners > iRobot and other robots
No 5th Sub Menu Vacuum cleaners > Car vacuum cleaners
No 5th Sub Menu Vacuum cleaners > Central vacuum cleaners
No 5th Sub Menu Vacuum cleaners > Accessories
No 5th Sub Menu Vacuum cleaners > Vacuum cleaners repair
No 5th Sub Menu Vacuum cleaners > Other
No 5th Su

No 5th Sub Menu Stairs > Roof ladder
No 5th Sub Menu Stairs > Repair and installation
No 5th Sub Menu Stairs > Other
No 5th Sub Menu Heating boilers > Pellet boilers
No 5th Sub Menu Heating boilers > Wood heating boilers
No 5th Sub Menu Heating boilers > Gas heating boilers
No 5th Sub Menu Fireplaces > Granule fireplaces
No 5th Sub Menu Fireplaces > Norwegian fireplaces
No 5th Sub Menu Fireplaces > Fireplace stove
No 5th Sub Menu Fireplaces > Electric fireplaces
No 5th Sub Menu Tractors > Belarus
No 5th Sub Menu Tractors > Case IH
No 5th Sub Menu Tractors > Claas
No 5th Sub Menu Tractors > Fendt
No 5th Sub Menu Tractors > Iseki
No 5th Sub Menu Tractors > JCB
No 5th Sub Menu Tractors > John Deere
No 5th Sub Menu Tractors > Jumz
No 5th Sub Menu Tractors > Massey Ferguson
No 5th Sub Menu Tractors > MTZ
No 5th Sub Menu Tractors > New Holland
No 5th Sub Menu Tractors > Valtra
No 5th Sub Menu Tractors > Yanmar
No 5th Sub Menu Tractors > Other
No 5th Sub Menu Tractors > Repair and service
No 

No 5th Sub Menu Show all ads > Purse
No 5th Sub Menu Show all ads > Documents
No 5th Sub Menu Show all ads > Cat
No 5th Sub Menu Show all ads > Dog
No 5th Sub Menu Show all ads > Passport
No 5th Sub Menu Show all ads > Id card
No 5th Sub Menu Show all ads > etalons
No 5th Sub Menu Show all ads > License
No 5th Sub Menu Show all ads > Phone
No 5th Sub Menu Show all ads > Phone
No 5th Sub Menu Show all ads > Keys
No 5th Sub Menu Show all ads > Purse
No 5th Sub Menu Children's activities > Birthdays
No 5th Sub Menu Children's activities > Graduations
No 5th Sub Menu Children's activities > Children's camps
No 5th Sub Menu Children's activities > Excursions and leisure
No 5th Sub Menu Children's activities > Children's rooms and cafe
No 5th Sub Menu Children's activities > Other
No 5th Sub Menu Tickets, invitations > Ballet, opera
No 5th Sub Menu Tickets, invitations > Theaters
No 5th Sub Menu Tickets, invitations > Concerts
No 5th Sub Menu Tickets, invitations > Exhibitions
No 5th Sub Men

In [None]:
driver.close()

# Preview Data

In [346]:
names = [x['en'] for x in c]
parents = [x['parent'] for x in c]
print(len(c))
for i in range(len(parents)):
    print(parents[i], '>', names[i])

12
None > Transport
None > Real estate
None > Work and services
None > Electronics, equipment
None > Construction
None > Production
None > Agriculture
None > Household goods
None > Child's world
None > Clothes, shoes, accessories
None > Domestic animals
None > Recreation and culture


In [156]:
names = [x['en'] for x in cc]
parents = [x['parent'] for x in cc]
print(len(cc))
for i in range(len(parents)):
    print(parents[i], '>', names[i])

142
Transport > Cars
Transport > Car spare parts
Transport > Commercial Vehicles
Transport > Moto transport
Transport > Water transport
Transport > Transport rent
Transport > Personal transport
Transport > Transportation
Transport > Car services
Transport > Air transport
Transport > Carting, Buggy
Transport > Disabled persons Transports
Transport > RVs, Trailers
Transport > Trailers
Transport > Car service equipment  
Transport > Roof racks
Real estate > Apartments
Real estate > Houses
Real estate > Land property
Real estate > Farms, estates, manors
Real estate > Garages
Real estate > Bathhouses
Real estate > Buildings
Real estate > Car-care centers
Real estate > Factories
Real estate > Forest
Real estate > Gardens
Real estate > Hangars
Real estate > Mobile houses
Real estate > Modular houses
Real estate > Offices
Real estate > Parking places
Real estate > Shops
Real estate > Spaces
Real estate > Summer cottages
Real estate > Warehouses
Real estate > Workshops
Real estate > Auctions
Re

In [343]:
names = [x['en'] for x in ccc]
parents = [x['parent'] for x in ccc]
print(len(ccc))
for i in range(len(parents)):
    print(parents[i], '>', names[i])

1320
Cars > Alfa-Romeo
Cars > Audi
Cars > Bentley
Cars > BMW
Cars > Cadillac
Cars > Chevrolet
Cars > Chrysler
Cars > Citroen
Cars > Dodge
Cars > Fiat
Cars > Ford
Cars > Honda
Cars > Hyundai
Cars > Infiniti
Cars > Jaguar
Cars > Jeep
Cars > Kia
Cars > Land-Rover
Cars > Lexus
Cars > Mazda
Cars > Mercedes-Benz
Cars > Mini
Cars > Mitsubishi
Cars > Nissan
Cars > Opel
Cars > Peugeot
Cars > Porsche
Cars > Renault
Cars > Rover
Cars > Saab
Cars > Seat
Cars > Skoda
Cars > Smart
Cars > Subaru
Cars > Suzuki
Cars > Toyota
Cars > Volkswagen
Cars > Volvo
Cars > Electric cars
Cars > Other brands
Cars > Russian
Cars > Sport cars
Car spare parts > Parts from one a/m
Car spare parts > Tires
Car spare parts > Alloy wheels
Car spare parts > Auto spares
Car spare parts > Accessories
Car spare parts > Auto tuning
Car spare parts > Batteries
Car spare parts > Car chemicals, cosmetics
Car spare parts > Cars for spare parts
Car spare parts > Chargers
Car spare parts > Interior
Car spare parts > Oils
Car spare pa

Video devices > Other
GPS navigation > Igo
GPS navigation > Garmin
GPS navigation > Tom Tom
GPS navigation > Multinavi
GPS navigation > Smart
GPS navigation > Backer
GPS navigation > Repair
GPS navigation > Other
Technics for office and shop > Barcode scanners
Technics for office and shop > Cash machines
Technics for office and shop > Coin and currency counters
Technics for office and shop > Consumables
Technics for office and shop > Currency detectors
Technics for office and shop > Dot matrix printers
Technics for office and shop > Faxes
Technics for office and shop > Laminators
Technics for office and shop > Laser printers
Technics for office and shop > Office supplies
Technics for office and shop > Paper shredders
Technics for office and shop > Photocopiers
Technics for office and shop > Printers 3 in 1
Technics for office and shop > Safes, cash boxes
Technics for office and shop > Scales
Technics for office and shop > Trade showcases
Technics for office and shop > Vending machines 

Dogs, puppies > Houses and beddings
Dogs, puppies > Accessories
Dogs, puppies > Other
Cats, kittens > Cats
Cats, kittens > Feed
Cats, kittens > Carrier bags
Cats, kittens > Cat houses
Cats, kittens > Toys, accessories
Cats, kittens > Breeding
Cats, kittens > Found cat
Cats, kittens > Other
Aquarium fish > Fish
Aquarium fish > Aquariums
Aquarium fish > Aquarium plants
Aquarium fish > Filters, heaters
Aquarium fish > Cupboards for aquariums
Aquarium fish > Feed
Aquarium fish > Other
Decorative birds > Birds
Decorative birds > Bird cages
Decorative birds > Feed
Decorative birds > Accessories
Decorative birds > Other
Exotic animals > Turtles
Exotic animals > Lizards
Exotic animals > Snakes
Exotic animals > Spiders
Exotic animals > Snails
Exotic animals > Cages, terrariums
Exotic animals > Feed
Exotic animals > Accessories
Exotic animals > Other
Rodents and accessories > Rodents
Rodents and accessories > Rodent cages
Rodents and accessories > Accessories
Rodents and accessories > Feed
Roden

In [344]:
names = [x['en'] for x in cccc]
parents = [x['parent'] for x in cccc]
print(len(cccc))
for i in range(len(parents)):
    print(parents[i], '>', names[i])

2291
Alfa-Romeo > 146
Alfa-Romeo > 147
Alfa-Romeo > 155
Alfa-Romeo > 156
Alfa-Romeo > 159
Alfa-Romeo > 164
Alfa-Romeo > 166
Alfa-Romeo > Guilietta
Alfa-Romeo > Brera
Alfa-Romeo > GT
Alfa-Romeo > Mito
Alfa-Romeo > Spider
Alfa-Romeo > Other
Audi > A series
Audi > Q series
Audi > S series
Audi > RS series
Audi > R8
Audi > TT series
Audi > 80-200
Audi > Other
Bentley > Continental
Bentley > Bentayga
Bentley > Other
BMW > 1 series
BMW > 2 series
BMW > 3 series
BMW > 4 series
BMW > 5 series
BMW > 6 series
BMW > 7 series
BMW > 8 series
BMW > Alpina
BMW > X series
BMW > Z series
BMW > M series
BMW > i series
BMW > Other
Cadillac > ATS
Cadillac > BLS
Cadillac > CTS
Cadillac > DeVille
Cadillac > Eldorado
Cadillac > Escalade
Cadillac > Fleetwood
Cadillac > SRX
Cadillac > STS
Cadillac > Seville
Cadillac > XT5
Cadillac > Other
Chevrolet > Aveo
Chevrolet > Captiva
Chevrolet > Cobalt
Chevrolet > Cruze
Chevrolet > Epica
Chevrolet > Lacetti
Chevrolet > Lanos
Chevrolet > Niva
Chevrolet > Orlando
Chevrol

Toyota > Aygo
Toyota > Auris
Toyota > Avensis
Toyota > C-HR
Toyota > Camry
Toyota > Carina
Toyota > Celica
Toyota > Corolla
Toyota > FJ Cruiser
Toyota > GT 86
Toyota > Hiace
Toyota > Highlander
Toyota > Hilux
Toyota > IQ
Toyota > Yaris
Toyota > Land Cruiser
Toyota > Matrix
Toyota > MR2
Toyota > Mark II
Toyota > Previa
Toyota > Prius Plus
Toyota > Prius
Toyota > Proace
Toyota > RAV 4
Toyota > Sienna
Toyota > Solara
Toyota > Supra
Toyota > Tundra
Toyota > Venza
Toyota > Verso
Toyota > Verso-S
Toyota > Other
Volkswagen > Amarok
Volkswagen > Arteon
Volkswagen > Beetle
Volkswagen > Bora
Volkswagen > Caddy
Volkswagen > CC
Volkswagen > Corrado
Volkswagen > Crafter
Volkswagen > Caravelle
Volkswagen > Eos
Volkswagen > Fox
Volkswagen > Golf
Volkswagen > Jetta
Volkswagen > LT
Volkswagen > Lupo
Volkswagen > Passat
Volkswagen > Phaeton
Volkswagen > Polo
Volkswagen > Scirocco
Volkswagen > Sharan
Volkswagen > Taro
Volkswagen > Tiguan
Volkswagen > Touareg
Volkswagen > Touran
Volkswagen > Transporter
V

Riga district > Salaspils region
Riga district > Saulkrasti region
Riga district > Seja region
Riga district > Sigulda region
Riga district > Stopini region
Riga district > Cits
Jurmala > Priedaine
Jurmala > Lielupe
Jurmala > Bulduri
Jurmala > Dzintari
Jurmala > Majori
Jurmala > Dubulti
Jurmala > Jaundubulti
Jurmala > Pumpuri
Jurmala > Melluzi
Jurmala > Asari
Jurmala > Vaivari
Jurmala > Sloka
Jurmala > Kauguri
Jurmala > Kemeri
Jurmala > Cits
Daugavpils > Center
Daugavpils > Cerepova
Daugavpils > Cietoksnis
Daugavpils > Dzelzcelnieks
Daugavpils > Dzintari
Daugavpils > Esplanade
Daugavpils > Ezermala
Daugavpils > Gajoks
Daugavpils > Griva
Daugavpils > Jauna Forstate
Daugavpils > Jaunbuve
Daugavpils > Jaunstropi
Daugavpils > Judovka
Daugavpils > Kalkuni
Daugavpils > Kimija
Daugavpils > Krizi
Daugavpils > Liginiski
Daugavpils > Mazstropi
Daugavpils > Mezciems
Daugavpils > Niderkuni
Daugavpils > Rugeli
Daugavpils > Veca Forstate
Daugavpils > Vecstropi
Daugavpils > Viduspogulanka
Daugavpils 

Trade, services > Stacker
Trade, services > Storekeeper
Trade, services > Technician
Trade, services > Other
Transport, logistics > Aviotehnician
Transport, logistics > Captain
Transport, logistics > Courier
Transport, logistics > Declarant
Transport, logistics > Dispatcher
Transport, logistics > Docker
Transport, logistics > Driver
Transport, logistics > Instructor
Transport, logistics > Logist
Transport, logistics > Pilot
Transport, logistics > Seaman
Transport, logistics > Stewardess
Transport, logistics > Steward
Driving schools > Category A
Driving schools > Category B
Driving schools > Category B, E
Driving schools > Category C
Driving schools > Category C, E
Driving schools > Category D
Driving schools > Professional category
Driving schools > Driving instructor services
Driving schools > Trolley drivers
Driving schools > Tram drivers
Driving schools > Other
Beauty industry > Visagiste
Beauty industry > Cosmetologists
Beauty industry > Manicure, pedicure
Beauty industry > Masseu

Martial arts > Kickboxing
Martial arts > Classical wrestling
Martial arts > Hand-to-hand combat, self-defense
Martial arts > Sambo
Martial arts > Thai boxing
Martial arts > Taekwondo
Martial arts > Wushu
Martial arts > Fencing
Martial arts > Other
Water Sports > Rowing
Water Sports > Powerboating
Water Sports > Water skiing
Water Sports > Swimming
Water Sports > Diving
Water Sports > Diving sport
Water Sports > Surf
Water Sports > Other
String instruments > Acoustic Guitars
String instruments > Electric guitars
String instruments > Bass guitar
String instruments > Violins
String instruments > Amplifiers
String instruments > Accessories
String instruments > Other
Keyboards instruments > Piano
Keyboards instruments > Digital pianos
Keyboards instruments > Synthesizers
Keyboards instruments > Accordions
Keyboards instruments > Amplifiers
Keyboards instruments > Accessories
Keyboards instruments > Other
Wind instruments > Clarinets
Wind instruments > Saxophones
Wind instruments > Accessori

In [347]:
print(len(c))
for i in c:
    print(i)

12
{'en': 'Transport', 'ru': 'Транспорт', 'lv': 'Transports', 'parent': None, 'url': 'https://mm.lv/transport-2'}
{'en': 'Real estate', 'ru': 'Недвижимость', 'lv': 'Nekustamie īpašumi', 'parent': None, 'url': 'https://mm.lv/real-estate'}
{'en': 'Work and services', 'ru': 'Работа и услуги', 'lv': 'Darbs un pakalpojumi', 'parent': None, 'url': 'https://mm.lv/work-and-services'}
{'en': 'Electronics, equipment', 'ru': 'Электроника, техника', 'lv': 'Elektronika, tehnika', 'parent': None, 'url': 'https://mm.lv/electronics-equipment'}
{'en': 'Construction', 'ru': 'Строительство', 'lv': 'Celtniecība', 'parent': None, 'url': 'https://mm.lv/construction'}
{'en': 'Production', 'ru': 'Производство', 'lv': 'Ražošana', 'parent': None, 'url': 'https://mm.lv/production'}
{'en': 'Agriculture', 'ru': 'Хозяйство', 'lv': 'Lauksaimniecība', 'parent': None, 'url': 'https://mm.lv/agriculture'}
{'en': 'Household goods', 'ru': 'Товары для дома', 'lv': 'Mājai un dārzam', 'parent': None, 'url': 'https://mm.lv/ho

In [180]:
print(len(cc))
for i in cc:
    print(i)

142
{'en': 'Cars', 'ru': 'Легковые авто', 'lv': 'Vieglie auto', 'parent': 'Transport', 'url': 'https://mm.lv/cars'}
{'en': 'Car spare parts', 'ru': 'Автозапчасти', 'lv': 'Auto rezerves daļas', 'parent': 'Transport', 'url': 'https://mm.lv/car-spare-parts'}
{'en': 'Commercial Vehicles', 'ru': 'Коммерческий транспорт', 'lv': 'Komerctransports', 'parent': 'Transport', 'url': 'https://mm.lv/commercial-vehicles'}
{'en': 'Moto transport', 'ru': 'Мото транспорт', 'lv': 'Moto transports', 'parent': 'Transport', 'url': 'https://mm.lv/moto-transport-en'}
{'en': 'Water transport', 'ru': 'Водный транспорт', 'lv': 'Ūdens transports', 'parent': 'Transport', 'url': 'https://mm.lv/water-transport'}
{'en': 'Transport rent', 'ru': 'Аренда транспорта', 'lv': 'Transporta noma', 'parent': 'Transport', 'url': 'https://mm.lv/transport-rent'}
{'en': 'Personal transport', 'ru': 'Вело транспорт', 'lv': 'Velo transports', 'parent': 'Transport', 'url': 'https://mm.lv/personal-transport'}
{'en': 'Transportation', '

In [304]:
print(len(ccc))
for i in ccc:
    print(i)

1320
{'en': 'Alfa-Romeo', 'ru': 'Alfa-Romeo', 'lv': 'Alfa-Romeo', 'parent': 'Cars', 'url': 'https://mm.lv/alfa-romeo-en'}
{'en': 'Audi', 'ru': 'Автомобили Audi', 'lv': 'Vieglie auto Audi', 'parent': 'Cars', 'url': 'https://mm.lv/cars-audi'}
{'en': 'Bentley', 'ru': 'Bentley', 'lv': 'Bentley', 'parent': 'Cars', 'url': 'https://mm.lv/bentley-en'}
{'en': 'BMW', 'ru': 'BMW', 'lv': 'BMW auto', 'parent': 'Cars', 'url': 'https://mm.lv/bmw-en'}
{'en': 'Cadillac', 'ru': 'Cadillac', 'lv': 'Cadillac', 'parent': 'Cars', 'url': 'https://mm.lv/cadillac-en'}
{'en': 'Chevrolet', 'ru': 'Chevrolet', 'lv': 'Chevrolet', 'parent': 'Cars', 'url': 'https://mm.lv/chevrolet-en'}
{'en': 'Chrysler', 'ru': 'Chrysler', 'lv': 'Chrysler', 'parent': 'Cars', 'url': 'https://mm.lv/chrysler-en'}
{'en': 'Citroen', 'ru': 'Citroen', 'lv': 'Citroen', 'parent': 'Cars', 'url': 'https://mm.lv/citroen-en'}
{'en': 'Dodge', 'ru': 'Dodge', 'lv': 'Dodge', 'parent': 'Cars', 'url': 'https://mm.lv/dodge-en'}
{'en': 'Fiat', 'ru': 'Fiat'

In [198]:
print(len(cccc))
for i in cccc:
    print(i)

2291
{'en': '146', 'ru': '146', 'lv': '146', 'parent': 'Alfa-Romeo', 'url': 'https://mm.lv/alfa-romeo-146-en'}
{'en': '147', 'ru': '147', 'lv': '147', 'parent': 'Alfa-Romeo', 'url': 'https://mm.lv/alfa-romeo-147-en'}
{'en': '155', 'ru': '155', 'lv': '155', 'parent': 'Alfa-Romeo', 'url': 'https://mm.lv/alfa-romeo-155-en'}
{'en': '156', 'ru': '156', 'lv': '156', 'parent': 'Alfa-Romeo', 'url': 'https://mm.lv/alfa-romeo-156-en'}
{'en': '159', 'ru': '159', 'lv': '159', 'parent': 'Alfa-Romeo', 'url': 'https://mm.lv/alfa-romeo-159-en'}
{'en': '164', 'ru': '164', 'lv': '164', 'parent': 'Alfa-Romeo', 'url': 'https://mm.lv/alfa-romeo-164-en'}
{'en': '166', 'ru': '166', 'lv': '166', 'parent': 'Alfa-Romeo', 'url': 'https://mm.lv/alfa-romeo-166-en'}
{'en': 'Guilietta', 'ru': 'Guilietta', 'lv': 'Guilietta', 'parent': 'Alfa-Romeo', 'url': 'https://mm.lv/alfa-romeo-guilietta-en'}
{'en': 'Brera', 'ru': 'Brera', 'lv': 'Brera', 'parent': 'Alfa-Romeo', 'url': 'https://mm.lv/alfa-romeo-brera-en'}
{'en': 'G

{'en': 'Controller', 'ru': 'Контролер', 'lv': 'Kontrolieris', 'parent': 'Manufacturing, agriculture', 'url': 'https://mm.lv/controller-search-for-job'}
{'en': 'Craftsman', 'ru': 'Ремесленник', 'lv': 'Amatnieks', 'parent': 'Manufacturing, agriculture', 'url': 'https://mm.lv/craftsman-search-for-job'}
{'en': 'Designer', 'ru': 'Дизайнер', 'lv': 'Dizaineris', 'parent': 'Manufacturing, agriculture', 'url': 'https://mm.lv/designer-search-for-job'}
{'en': 'Engineer', 'ru': 'Инженер', 'lv': 'Inženieris', 'parent': 'Manufacturing, agriculture', 'url': 'https://mm.lv/engineer-search-for-job-in-manufacturing-agriculture'}
{'en': 'Furniture maker, carpenter', 'ru': 'Мебельщик, столяр', 'lv': 'Mēbeļmeistars, galdnieks', 'parent': 'Manufacturing, agriculture', 'url': 'https://mm.lv/furniture-maker-carpenter-search-for-job'}
{'en': 'Installer', 'ru': 'Монтажник', 'lv': 'Montāžnieks', 'parent': 'Manufacturing, agriculture', 'url': 'https://mm.lv/installer-search-for-job'}
{'en': 'Layout man', 'ru': 'М

In [199]:
print(len(ccccc))
for i in ccccc:
    print(i)

0


# Check for missing translation

In [187]:
# missing translations
for i in c:
    if(i['ru'] == None and i['lv'] == None):
        print(i)

In [188]:
# missing translations
for i in cc:
    if(i['ru'] == None and i['lv'] == None):
        print(i)

In [None]:
# missing translations
for i in ccc:
    print(i)

# Fixing incorrect translations

In [308]:
for ii, i in enumerate(ccc): # check for category classnames
    for j in cc:
        if(j['en'] == i['parent']):
            if(j['lv'] in i['lv']):
                print(ii, 'Not correct', i['en'], j['url'])
                driver.get(j['url'])
                menu = driver.find_element_by_class_name('cat-menu')
                categories = menu.find_elements_by_class_name('category')
                ru = None
                lv = None
                for ind, category in enumerate(categories):
                    if(category.text == i['en']):
                        index = ind
                        driver.find_element_by_id(ru_id).click() # click russian
                        menmen = driver.find_element_by_class_name('cat-menu')
                        catcat = menmen.find_elements_by_class_name('category')
                        ru = catcat[index].text
                        driver.find_element_by_id(lv_id).click() # click latvian
                        menmen = driver.find_element_by_class_name('cat-menu')
                        catcat = menmen.find_elements_by_class_name('category')
                        lv = catcat[index].text
                        break
                if(ru != None and lv != None):
                    i['ru'] = ru
                    i['lv'] = lv
                    print('Succesfully changed', i['en'], '-', ru, '-', lv)

1 Not correct Audi https://mm.lv/cars
190 Not correct Horse trailers https://mm.lv/trailers-en
193 Not correct Tent trailers https://mm.lv/trailers-en
195 Not correct Trailer hitch installation https://mm.lv/trailers-en
239 Not correct Other https://mm.lv/apartments
Succesfully changed Other - Квартиры за границей - Dzīvokļi ārzemēs
270 Not correct Other https://mm.lv/houses
Succesfully changed Other - Другой - Mājas ārzemēs
301 Not correct Other https://mm.lv/land-property
Succesfully changed Other - Земля за границей - Zeme ārzemēs
321 Not correct Arts, creativity, media https://mm.lv/jobs
322 Not correct Beauty salons https://mm.lv/jobs
323 Not correct Building https://mm.lv/jobs
324 Not correct Car business https://mm.lv/jobs
325 Not correct Carier start https://mm.lv/jobs
326 Not correct Education, science https://mm.lv/jobs
327 Not correct Entertainment, gambling https://mm.lv/jobs
328 Not correct Finance, jurisprudence https://mm.lv/jobs
329 Not correct Home staff https://mm.lv/

KeyboardInterrupt: 

In [319]:
for ii, i in enumerate(ccc): # check for active classnames
    for j in cc:
        if(j['en'] == i['parent']):
            if(j['lv'] in i['lv']):
                print(ii, 'Not correct', i['en'], i['url'])
                driver.get(i['url'])
                try:
                    driver.find_element_by_id(ru_id).click() # click russian
                    menu = driver.find_element_by_class_name('cat-menu')
                    ru = menu.find_element_by_class_name('active').text
                    driver.find_element_by_id(lv_id).click() # click latvian
                    menu = driver.find_element_by_class_name('cat-menu')
                    lv = menu.find_element_by_class_name('active').text
                    i['ru'] = ru
                    i['lv'] = lv
                    print('Pass', i['en'], '-', i['lv'], '-', i['ru'])
                except:
                    print('Fail')

1 Not correct Audi https://mm.lv/cars-audi
Fail
190 Not correct Horse trailers https://mm.lv/horse-trailers
Pass Horse trailers - Piekabes zirgiem - Прицепы для лошадей
193 Not correct Tent trailers https://mm.lv/tent-trailers
Pass Tent trailers - Piekabes ar tentu - Прицепы с тентами
195 Not correct Trailer hitch installation https://mm.lv/trailer-hitch-installation
Pass Trailer hitch installation - Piekabes āķu uzstādīšana - Установка фаркопов
239 Not correct Other https://mm.lv/apartments-other
Fail
270 Not correct Other https://mm.lv/houses-other
Fail
301 Not correct Other https://mm.lv/land-property-other
Fail
321 Not correct Arts, creativity, media https://mm.lv/arts-creativity-media-jobs
Fail
322 Not correct Beauty salons https://mm.lv/beauty-salons-jobs
Fail
323 Not correct Building https://mm.lv/building-jobs
Fail
324 Not correct Car business https://mm.lv/car-business-jobs
Fail
325 Not correct Carier start https://mm.lv/carier-start-jobs
Fail
326 Not correct Education, scienc

In [321]:
for i in ccc:
    if(i['parent'] == 'Jobs'):
        print(i)

{'en': 'Arts, creativity, media', 'ru': 'Вакансии Искусство, творчество, медиа', 'lv': 'Vakances Māksla, radošums, mediji', 'parent': 'Jobs', 'url': 'https://mm.lv/arts-creativity-media-jobs'}
{'en': 'Beauty salons', 'ru': 'Вакансии в сфере Салонов красоты', 'lv': 'Vakances Skaistumkopšanas salonos', 'parent': 'Jobs', 'url': 'https://mm.lv/beauty-salons-jobs'}
{'en': 'Building', 'ru': 'Вакансии Строительство', 'lv': 'Vakances Celtniecība', 'parent': 'Jobs', 'url': 'https://mm.lv/building-jobs'}
{'en': 'Car business', 'ru': 'Работа в сфере Автомобильного бизнеса', 'lv': 'Vakances Automobiļu biznesā', 'parent': 'Jobs', 'url': 'https://mm.lv/car-business-jobs'}
{'en': 'Carier start', 'ru': 'Вакансии Начало карьеры', 'lv': 'Vakances Karjeras sākumā', 'parent': 'Jobs', 'url': 'https://mm.lv/carier-start-jobs'}
{'en': 'Education, science', 'ru': 'Вакансии Образование, наука', 'lv': 'Vakances Izglītība, zinātne', 'parent': 'Jobs', 'url': 'https://mm.lv/education-science-jobs'}
{'en': 'Enterta

In [323]:
for i in ccc:
    if(i['parent'] == 'Mobile phones'):
        print(i)

{'en': 'Apple', 'ru': 'Мобильные телефоны Apple', 'lv': 'Mobilie telefoni Apple', 'parent': 'Mobile phones', 'url': 'https://mm.lv/mobile-phones-apple'}
{'en': 'Asus', 'ru': 'Мобильные телефоны Asus', 'lv': 'Mobilie telefoni Asus', 'parent': 'Mobile phones', 'url': 'https://mm.lv/mobile-phones-asus'}
{'en': 'BlackBerry', 'ru': 'Мобильные телефоны BlackBerry', 'lv': 'Mobilie telefoni BlackBerry', 'parent': 'Mobile phones', 'url': 'https://mm.lv/mobile-phones-blackberry'}
{'en': 'Coolpad', 'ru': 'Мобильные телефоны Coolpad', 'lv': 'Mobilie telefoni Coolpad', 'parent': 'Mobile phones', 'url': 'https://mm.lv/mobile-phones-coolpad'}
{'en': 'HTC', 'ru': 'Мобильные телефоны HTC', 'lv': 'Mobilie telefoni HTC', 'parent': 'Mobile phones', 'url': 'https://mm.lv/mobile-phones-htc'}
{'en': 'Huawei', 'ru': 'Мобильные телефоны Huawei', 'lv': 'Mobilie telefoni Huawei', 'parent': 'Mobile phones', 'url': 'https://mm.lv/mobile-phones-huawei'}
{'en': 'Lenovo', 'ru': 'Мобильные телефоны Lenovo', 'lv': 'Mob

In [277]:
# missing translations
for i in cccc:
    if(i['ru'] == None and i['lv'] == None):
        print(i)
#         driver.get(i['url'])find_element_by_class_name
#         try:
#             driver.find_element_by_id(ru_id).click() # click russian
#             ru = driver.find_element_by_class_name('title').text
#             driver.find_element_by_id(lv_id).click() # click latvian
#             lv = driver.find_element_by_class_name('title').text
#             i['ru'] = ru
#             i['lv'] = lv
#             print('Title Type', i['en'])
#         except:
#             print('Error in ', i['en'])
#             ru = None
#             lv = None
#         i['ru'] = ru
#         i['lv'] = lv

# driver.find_element_by_id(en_id).click() # click english

In [278]:
# missing translations
for i in ccccc:
    if(i['ru'] == None and i['lv'] == None):
        print(i)

In [279]:
print(len(c) + len(cc) + len(ccc) + len(cccc) + len(ccccc))

3765


# Putting Data into .xlsx

In [324]:
workbook = xlsxwriter.Workbook('data.xlsx')
worksheet = workbook.add_worksheet()
worksheet.set_column('A:G', 25)
bold_text = workbook.add_format({'bold': True})
worksheet.write(0, 0, 'Level 1', bold_text)
worksheet.write(0, 1, 'Level 2', bold_text)
worksheet.write(0, 2, 'Level 3', bold_text)
worksheet.write(0, 3, 'Level 4', bold_text)
worksheet.write(0, 4, 'EN', bold_text)
worksheet.write(0, 5, 'LV', bold_text)
worksheet.write(0, 6, 'RU', bold_text)

red_text = workbook.add_format({'font_color': 'red'}) # c
blue_text = workbook.add_format({'font_color': 'blue'}) # cc
green_text = workbook.add_format({'font_color': 'green'}) # ccc
purple_text = workbook.add_format({'font_color': 'purple'}) # cccc
orange_text = workbook.add_format({'font_color': 'orange'}) # ccccc

urls = []

row = 1
for first in c:
    if(first['url'] not in urls):
        worksheet.write(row, 0, first['en'], red_text) # level 1
#         worksheet.write(row, 1, first['en'], red_text) # level 2
#         worksheet.write(row, 2, first['en'], red_text) # level 3
#         worksheet.write(row, 3, first['en'], red_text) # level 4
        worksheet.write(row, 4, first['en'], red_text) # en
        worksheet.write(row, 5, first['lv'], red_text) # ru
        worksheet.write(row, 6, first['ru'], red_text) # lv
        urls.append(first['url'])
    print(first['en'], row)
    row += 1
    for second in cc:
        if(first['en'] == second['parent'] and second['ru'] != None and second['lv'] != None):
            if(second['url'] not in urls):
                worksheet.write(row, 0, first['en'], blue_text) # level 1
                worksheet.write(row, 1, second['en'], blue_text) # level 2
        #         worksheet.write(row, 2, second['en'], blue_text) # level 3
        #         worksheet.write(row, 3, second['en'], blue_text) # level 4
                worksheet.write(row, 4, second['en'], blue_text) # en
                worksheet.write(row, 5, second['lv'], blue_text) # ru
                worksheet.write(row, 6, second['ru'], blue_text) # lv
                urls.append(second['url'])
                row += 1
                for third in ccc:
                    if(second['en'] == third['parent'] and third['ru'] != None and third['lv'] != None):
                        if(third['url'] not in urls):
                            worksheet.write(row, 0, first['en'], green_text) # level 1
                            worksheet.write(row, 1, second['en'], green_text) # level 2
                            worksheet.write(row, 2, third['en'], green_text) # level 3
                    #         worksheet.write(row, 3, second['en'], green_text) # level 4
                            worksheet.write(row, 4, third['en'], green_text) # en
                            worksheet.write(row, 5, third['lv'], green_text) # ru
                            worksheet.write(row, 6, third['ru'], green_text) # lv
                            urls.append(third['url'])
                            row += 1
                            for fourth in cccc:
                                if(third['en'] == fourth['parent'] and fourth['ru'] != None and fourth['lv'] != None):
                                    if(fourth['url'] not in urls):
                                        worksheet.write(row, 0, first['en'], purple_text) # level 1
                                        worksheet.write(row, 1, second['en'], purple_text) # level 2
                                        worksheet.write(row, 2, third['en'], purple_text) # level 3
                                        worksheet.write(row, 3, fourth['en'], purple_text) # level 4
                                        worksheet.write(row, 4, fourth['en'], purple_text) # en
                                        worksheet.write(row, 5, fourth['lv'], purple_text) # ru
                                        worksheet.write(row, 6, fourth['ru'], purple_text) # lv
                                        urls.append(fourth['url'])
                                        row += 1
    
workbook.close()

print(row)

Transport 1
Real estate 1207
Work and services 1642
Electronics, equipment 2256
Construction 2569
Production 2863
Agriculture 2864
Household goods 2978
Child's world 3177
Clothes, shoes, accessories 3298
Domestic animals 3429
Recreation and culture 3486
3683


In [325]:
workbook = xlsxwriter.Workbook('with_link.xlsx')
worksheet = workbook.add_worksheet()
worksheet.set_column('A:G', 25)
bold_text = workbook.add_format({'bold': True})
worksheet.write(0, 0, 'Level 1', bold_text)
worksheet.write(0, 1, 'Level 2', bold_text)
worksheet.write(0, 2, 'Level 3', bold_text)
worksheet.write(0, 3, 'Level 4', bold_text)
worksheet.write(0, 4, 'EN', bold_text)
worksheet.write(0, 5, 'LV', bold_text)
worksheet.write(0, 6, 'RU', bold_text)
worksheet.write(0, 7, 'URL', bold_text)

red_text = workbook.add_format({'font_color': 'red'}) # c
blue_text = workbook.add_format({'font_color': 'blue'}) # cc
green_text = workbook.add_format({'font_color': 'green'}) # ccc
purple_text = workbook.add_format({'font_color': 'purple'}) # cccc
orange_text = workbook.add_format({'font_color': 'orange'}) # ccccc
urls = []

row = 1
for first in c:
    if(first['url'] not in urls):
        worksheet.write(row, 0, first['en'], red_text) # level 1
#         worksheet.write(row, 1, first['en'], red_text) # level 2
#         worksheet.write(row, 2, first['en'], red_text) # level 3
#         worksheet.write(row, 3, first['en'], red_text) # level 4
        worksheet.write(row, 4, first['en'], red_text) # en
        worksheet.write(row, 5, first['lv'], red_text) # ru
        worksheet.write(row, 6, first['ru'], red_text) # lv
        worksheet.write(row, 7, first['url'], red_text) # url
        urls.append(first['url'])
    print(first['en'], row)
    row += 1
    for second in cc:
        if(first['en'] == second['parent'] and second['ru'] != None and second['lv'] != None):
            if(second['url'] not in urls):
                worksheet.write(row, 0, first['en'], blue_text) # level 1
                worksheet.write(row, 1, second['en'], blue_text) # level 2
        #         worksheet.write(row, 2, second['en'], blue_text) # level 3
        #         worksheet.write(row, 3, second['en'], blue_text) # level 4
                worksheet.write(row, 4, second['en'], blue_text) # en
                worksheet.write(row, 5, second['lv'], blue_text) # ru
                worksheet.write(row, 6, second['ru'], blue_text) # lv
                worksheet.write(row, 7, second['url'], blue_text) # url
                urls.append(second['url'])
                row += 1
                for third in ccc:
                    if(second['en'] == third['parent'] and third['ru'] != None and third['lv'] != None):
                        if(third['url'] not in urls):
                            worksheet.write(row, 0, first['en'], green_text) # level 1
                            worksheet.write(row, 1, second['en'], green_text) # level 2
                            worksheet.write(row, 2, third['en'], green_text) # level 3
                    #         worksheet.write(row, 3, second['en'], green_text) # level 4
                            worksheet.write(row, 4, third['en'], green_text) # en
                            worksheet.write(row, 5, third['lv'], green_text) # ru
                            worksheet.write(row, 6, third['ru'], green_text) # lv
                            worksheet.write(row, 7, third['url'], green_text) # url
                            urls.append(third['url'])
                            row += 1
                            for fourth in cccc:
                                if(third['en'] == fourth['parent'] and fourth['ru'] != None and fourth['lv'] != None):
                                    if(fourth['url'] not in urls):
                                        worksheet.write(row, 0, first['en'], purple_text) # level 1
                                        worksheet.write(row, 1, second['en'], purple_text) # level 2
                                        worksheet.write(row, 2, third['en'], purple_text) # level 3
                                        worksheet.write(row, 3, fourth['en'], purple_text) # level 4
                                        worksheet.write(row, 4, fourth['en'], purple_text) # en
                                        worksheet.write(row, 5, fourth['lv'], purple_text) # ru
                                        worksheet.write(row, 6, fourth['ru'], purple_text) # lv
                                        worksheet.write(row, 7, fourth['url'], purple_text) # url
                                        urls.append(fourth['url'])
                                        row += 1
    
workbook.close()

print(row)

Transport 1
Real estate 1207
Work and services 1642
Electronics, equipment 2256
Construction 2569
Production 2863
Agriculture 2864
Household goods 2978
Child's world 3177
Clothes, shoes, accessories 3298
Domestic animals 3429
Recreation and culture 3486
3683


# Loading Excel File

In [366]:
# loading data from xlxs
import pandas as pd
level_1 = []
level_2 = []
level_3 = []
level_4 = []
df = pd.read_excel('with_link.xlsx')
for i in df.iloc:
    if(i['Level 2'] != i['Level 2'] and i['Level 3'] != i['Level 3'] and i['Level 4'] != i['Level 4']):
        level_1.append({'en': i['EN'], 'lv': i['LV'], 'ru': i['RU'], 'parent': None, 'url': i['URL']})
    elif(i['Level 3'] != i['Level 3'] and i['Level 4'] != i['Level 4']):
        level_2.append({'en': i['EN'], 'lv': i['LV'], 'ru': i['RU'], 'parent': i['Level 1'], 'url': i['URL']})
    elif(i['Level 4'] != i['Level 4']):
        level_3.append({'en': i['EN'], 'lv': i['LV'], 'ru': i['RU'], 'parent': i['Level 2'], 'url': i['URL']})
    else:
        level_4.append({'en': i['EN'], 'lv': i['LV'], 'ru': i['RU'], 'parent': i['Level 3'], 'url': i['URL']})
print(len(level_1))
print(len(level_2))
print(len(level_3))
print(len(level_4))
print(len(level_1) + len(level_2) + len(level_3) + len(level_4))
# print(df)

12
139
1270
2261
3682


In [367]:
for i in level_4:
    for j in level_3:
        if(i['parent'] == j['en']):
            if(j['lv'] in i['lv']):
                print(i)

{'en': 'Other', 'lv': 'Cits', 'ru': 'Другое', 'parent': 'Other', 'url': 'https://mm.lv/household-appliances-other'}
{'en': 'Other', 'lv': 'Cits', 'ru': 'Другое', 'parent': 'Other', 'url': 'https://mm.lv/household-appliances-other'}
{'en': 'Other', 'lv': 'Cits', 'ru': 'Другое', 'parent': 'Other', 'url': 'https://mm.lv/household-appliances-other'}
{'en': 'Other', 'lv': 'Cits', 'ru': 'Другое', 'parent': 'Other', 'url': 'https://mm.lv/household-appliances-other'}
{'en': 'Other', 'lv': 'Cits', 'ru': 'Другое', 'parent': 'Other', 'url': 'https://mm.lv/household-appliances-other'}
{'en': 'Other', 'lv': 'Cits', 'ru': 'Другое', 'parent': 'Other', 'url': 'https://mm.lv/household-appliances-other'}
{'en': 'Other', 'lv': 'Cits', 'ru': 'Другое', 'parent': 'Other', 'url': 'https://mm.lv/household-appliances-other'}
{'en': 'Other', 'lv': 'Cits', 'ru': 'Другое', 'parent': 'Other', 'url': 'https://mm.lv/household-appliances-other'}
{'en': 'Other', 'lv': 'Cits', 'ru': 'Другое', 'parent': 'Other', 'url': 

{'en': 'Vacuum cleaners', 'lv': 'Putekļu sūcēji', 'ru': 'Пылесосы', 'parent': 'Vacuum cleaners', 'url': 'https://mm.lv/vacuum-cleaners-en'}
{'en': 'Garden technic repair', 'lv': 'Dārza tehnikas remonts', 'ru': 'Ремонт садовой техники', 'parent': 'Garden technics', 'url': 'https://mm.lv/garden-technic-repair'}
{'en': 'Rent of garden machinery', 'lv': 'Dārza tehnikas noma', 'ru': 'Аренда садовой техники', 'parent': 'Garden technics', 'url': 'https://mm.lv/rent-of-garden-machinery'}


In [368]:
for i in level_3:
    for j in level_2:
        if(i['parent'] == j['en']):
            if(j['lv'] in i['lv']):
                print(i)

{'en': 'Horse trailers', 'lv': 'Piekabes zirgiem', 'ru': 'Прицепы для лошадей', 'parent': 'Trailers', 'url': 'https://mm.lv/horse-trailers'}
{'en': 'Tent trailers', 'lv': 'Piekabes ar tentu', 'ru': 'Прицепы с тентами', 'parent': 'Trailers', 'url': 'https://mm.lv/tent-trailers'}
{'en': 'Trailer hitch installation', 'lv': 'Piekabes āķu uzstādīšana', 'ru': 'Установка фаркопов', 'parent': 'Trailers', 'url': 'https://mm.lv/trailer-hitch-installation'}
{'en': 'Apartments abroad', 'lv': 'Dzīvokļi ārzemēs', 'ru': 'Квартиры за границей', 'parent': 'Apartments', 'url': 'https://mm.lv/apartments-apartments-abroad'}
{'en': 'Homes abroad', 'lv': 'Mājas ārzemēs', 'ru': 'Дома за границей', 'parent': 'Houses', 'url': 'https://mm.lv/houses-homes-abroad'}
{'en': 'Land area abroad', 'lv': 'Zeme ārzemēs', 'ru': 'Земля за границей', 'parent': 'Land property', 'url': 'https://mm.lv/land-property-abroad'}
{'en': 'Other', 'lv': 'Cits', 'ru': 'Другое', 'parent': 'Other', 'url': 'https://mm.lv/child-s-world-oth

In [369]:
for i in level_2:
    for j in level_1:
        if(i['parent'] == j['en']):
            if(j['lv'] in i['lv']):
                print(i)

{'en': 'Construction works', 'lv': 'Celtniecības darbi', 'ru': 'Строительные работы', 'parent': 'Construction', 'url': 'https://mm.lv/construction-works'}
{'en': 'Construction materials', 'lv': 'Celtniecības materiāli', 'ru': 'Строительные материалы', 'parent': 'Construction', 'url': 'https://mm.lv/construction-materials'}
{'en': 'Construction machinery', 'lv': 'Celtniecības tehnika', 'ru': 'Строительная техника', 'parent': 'Construction', 'url': 'https://mm.lv/construction-machinery'}
{'en': 'Agricultural machinery', 'lv': 'Lauksaimniecības tehnika', 'ru': 'Сельхозтехника', 'parent': 'Agriculture', 'url': 'https://mm.lv/agricultural-machinery'}
{'en': 'Agricultural works', 'lv': 'Lauksaimniecības darbi', 'ru': 'Сельхозработы', 'parent': 'Agriculture', 'url': 'https://mm.lv/agricultural-works'}


## To fix:
- small problem in apartments (repetition)
- level 5 category