In [None]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import urllib.parse
import tkinter as tk
import plotly.express as px

main_url = 'https://ikman.lk'

df = pd.DataFrame(columns=['Year', 'Price', 'District', 'Mileage','Link'])

def convert_to_num(input_str):
    input_str = input_str.replace("Rs ", "")
    output_num = int(input_str.replace(",", ""))
    return output_num

root = tk.Tk()
root.title("Enter Inputs")


def get_inputs():
    global model, min_year, max_year, min_price, max_price, min_miles, max_miles, pages

    model = model_entry.get() or None
    min_year = min_year_entry.get() or None
    max_year = max_year_entry.get() or None
    min_price = min_price_entry.get() or None
    max_price = max_price_entry.get() or None
    min_miles = min_miles_entry.get() or None
    max_miles = max_miles_entry.get() or None
    pages = pages_entry.get() or 1

    root.destroy()

model_label = tk.Label(root, text="Car Model (optional):")
model_label.grid(row=0, column=0)

model_entry = tk.Entry(root)
model_entry.grid(row=0, column=1)

min_year_label = tk.Label(root, text="Minimum YoM (optional):")
min_year_label.grid(row=1, column=0)

min_year_entry = tk.Entry(root)
min_year_entry.grid(row=1, column=1)

max_year_label = tk.Label(root, text="Maximum YoM (optional):")
max_year_label.grid(row=2, column=0)

max_year_entry = tk.Entry(root)
max_year_entry.grid(row=2, column=1)

min_price_label = tk.Label(root, text="Minimum Price (optional):")
min_price_label.grid(row=3, column=0)

min_price_entry = tk.Entry(root)
min_price_entry.grid(row=3, column=1)

max_price_label = tk.Label(root, text="Maximum Price (optional):")
max_price_label.grid(row=4, column=0)

max_price_entry = tk.Entry(root)
max_price_entry.grid(row=4, column=1)

min_miles_label = tk.Label(root, text="Minimum Mileage (optional):")
min_miles_label.grid(row=5, column=0)

min_miles_entry = tk.Entry(root)
min_miles_entry.grid(row=5, column=1)

max_miles_label = tk.Label(root, text="Maximum Mileage (optional):")
max_miles_label.grid(row=6, column=0)

max_miles_entry = tk.Entry(root)
max_miles_entry.grid(row=6, column=1)

pages_label = tk.Label(root, text="Pages to Extract (optional, default 1):")
pages_label.grid(row=7, column=0)

pages_entry = tk.Entry(root)
pages_entry.grid(row=7, column=1)

submit_button = tk.Button(root, text="Submit", command=get_inputs)
submit_button.grid(row=8, column=0, columnspan=2)

root.mainloop()

if min_year:
    min_year = int(min_year)
if max_year:
    max_year = int(max_year)
if min_price:
    min_price = int(min_price)
if max_price:
    max_price = int(max_price)
if min_miles:
    min_miles = int(min_miles)
if max_miles:
    max_miles = int(max_miles)
pages = int(pages)

for i in range(1, pages+1):
    url = f'https://ikman.lk/en/ads/sri-lanka/cars?sort=date&buy_now=0&urgent=0&query={model}&page={i}'
    if min_year:
        url += f'&numeric.model_year.minimum={min_year}'
    if max_year:
        url += f'&numeric.model_year.maximum={max_year}'
    if min_price:
        url += f'&money.price.minimum={min_price}'
    if max_price:
        url += f'&money.price.maximum={max_price}'
    if min_miles:
        url += f'&numeric.mileage.minimum={min_miles}'
    if max_miles:
        url += f'&numeric.mileage.maximum={max_miles}'
    
    page = requests.get(url)
        
    soup = BeautifulSoup(page.text, 'lxml')
    
    lists = soup.find('ul', class_='list--3NxGO')
    titles = lists.find_all('h2')
    prices = lists.find_all('div', class_='price--3SnqI color--t0tGX')
    districts = lists.find_all('div', class_='description--2-ez3')
    links = lists.find_all('a', class_='card-link--3ssYv gtm-ad-item')
    
    years_list = [title.get_text()[-4:] for title in titles]
    prices_list = [price.get_text() for price in prices]
    prices_list = [convert_to_num(price) for price in prices_list]
    districts_list = [district.get_text().split(',')[0] for district in districts]
    
    mileages_list = []
    for mile in lists.find_all('div', class_='content--3JNQz'):
        title = mile.find('h2')
        title.decompose() if title else None
        district = mile.find('div', class_="description--2-ez3")
        district.decompose() if district else None
        price = mile.find('div', class_="price--3SnqI color--t0tGX")
        price.decompose() if price else None
        mileages_list.append(mile.get_text().strip().split(' km')[0])
    
    mileages_list = [convert_to_num(miles) for miles in mileages_list]
    
    link_list = []
    for i in range(0, len(links)):
        link_list.append(main_url+links[i]['href'])
    
    df = df.append(pd.DataFrame({'Year': years_list, 'Price': prices_list, 'District': districts_list, 'Mileage':mileages_list, 'Link':link_list}), ignore_index=True)

def convert_year(year_str):
    try:
        year_int = int(year_str)
        if year_int >= 1900 and year_int <= 2100:
            return year_int
        else:
            return None
    except:
        return None

df['Year'] = df['Year'].apply(convert_year)
df = df[df['Year'].notnull()]
df = df.dropna()
df = df[df['Year'].astype(int) != 0]


df['Index'] = df.index + 1
fig = px.scatter(df, x='Mileage', y='Price', color='Year', hover_data=['Index', 'Link'])

fig.update_traces(textposition='middle center', textfont_size=1,
                  mode='markers+text',
                  marker=dict(size=6, line=dict(width=1, color='DarkSlateGrey')),
                  texttemplate='<a href="%{customdata[1]}">%{customdata[1]}</a>',
                  customdata=list(zip(df['Index'], df['Link'])))

fig.show()