In [197]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
import re 
import requests
from bs4 import BeautifulSoup as bs

In [198]:
def get_longitude_latitude(urls, headers):
    house_latitude = []
    house_longitude = []
    for url in urls: 
        response = requests.get(url, headers=headers)
        soup = bs(response.content, 'html.parser')

        latitude = soup.find('div', {'id' : 'item_map' })['data-lat']
        longitude = soup.find('div', {'id' : 'item_map' })['data-lng']
        house_latitude.append(latitude)
        house_longitude.append(longitude)
    return house_latitude, house_longitude

In [199]:
def web_scraping(url, headers):
      prices_list = []
      locations_list = []
      num_rooms_list = []
      area_list = []
      floor_list = []
      house_url_list = []

      for i in range(1, 10):
          base_url = f'{url}?page={i}'
          page = requests.get(base_url, headers=headers)
          soup = bs(page.content, features='lxml')
          house_url = soup.find_all('div', {'class': "items-i"})
          prices = soup.find_all('div', {'class': "price"})
          locations = soup.find_all('div', {'class': "location"})
          rooms = soup.find_all('ul', {'class': "name"})
          for j in range(len(prices)):
              if prices[j].text.split('/')[1] == 'ay':
                  price = int(prices[j].text.split('AZN')[0].replace(' ', ''))
              else:
                  price = int(prices[j].text.split('AZN')[0].replace(' ', '')) * 30
              loc = locations[j].text.strip()
              room_text = rooms[j].text.strip()
              link = house_url[j].find('a', {'class':'item_link'})['href']
              rooms_match = re.search(r'(\d+ otaqlı).*?(\d+ m²).*?(\d+/\d+ mərtəbə)', room_text)
              if rooms_match:
                  num_rooms = rooms_match.group(1).split(' ')[0]
                  area = rooms_match.group(2).split(' ')[0]
                  floor = rooms_match.group(3)
              else:
                  num_rooms = "N/A"
                  area = "N/A"
                  floor = "N/A"

              prices_list.append(price)
              locations_list.append(loc)
              num_rooms_list.append(num_rooms)
              area_list.append(area)
              floor_list.append(floor)
              house_url_list.append('https://bina.az'+link)
      latitude,longitude=get_longitude_latitude(house_url_list, headers)
      data = {
          'Price': prices_list,
          'Location': locations_list,
          'Number of Rooms': num_rooms_list,
          'Area': area_list,
          'Floor': floor_list,
          'URL': house_url_list,
          'Latitude':latitude,
          'Longitude':longitude
      }
      
      df = pd.DataFrame(data)
      return df

In [200]:
url = 'https://bina.az/baki/kiraye/menziller'

In [201]:
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}

In [202]:
df = web_scraping(url, headers)

In [203]:
df

Unnamed: 0,Price,Location,Number of Rooms,Area,Floor,URL,Latitude,Longitude
0,2500,Nəsimi r.,4,210,6/16 mərtəbə,https://bina.az/items/4465807,40.39232579999999,49.82880550000004
1,950,Nərimanov r.,3,130,6/14 mərtəbə,https://bina.az/items/4445253,40.41179853704892,49.84264992012954
2,2400,28 May m.,3,157,7/20 mərtəbə,https://bina.az/items/3444891,40.38176000136104,49.8549934476614
3,1500,Nəsimi r.,2,95,5/14 mərtəbə,https://bina.az/items/4365411,40.39232579999999,49.82880550000004
4,8000,Nardaran q.,2,96,4/4 mərtəbə,https://bina.az/items/4491270,40.57388149963061,49.98910996560669
...,...,...,...,...,...,...,...,...
247,1950,Sahil m.,3,95,4/7 mərtəbə,https://bina.az/items/3990656,40.36764489001798,49.83918583028572
248,160000,Neftçilər m.,3,83,3/9 mərtəbə,https://bina.az/items/4500213,40.40959271720067,49.94411068849102
249,3300,İçəri Şəhər m.,4,150,2/4 mərtəbə,https://bina.az/items/3280009,40.36808630320561,49.82978736989754
250,2200,Şah İsmayıl Xətai m.,3,110,7/7 mərtəbə,https://bina.az/items/4500207,40.38214417285258,49.88108193383788


In [204]:
metros_name = ['Halglar Dostlugu','Ahmedli','Neftchilar','Gara Garayev','Koroglu','28 May','Nariman Narimanov','Ganjlik','8 Noyabr','Inshaatchilar','Nizami Gəncəvi','Nesimi','Khojasan','Azadliq prospekti','Ulduz','Dərnəgül','Memar Ajami']
metros_latitude = [40.39689,40.38556,40.41116,40.41761,40.42084,40.37986,40.40282,40.3999,40.40187,40.38909,40.37932,40.42465,40.42308,40.42596,40.414963,40.4257064,40.4104823]
metros_longitude = [49.95299,49.95395,49.94257,49.93396,49.91802,49.84864,49.87064,49.85057,49.82051,49.80236,49.83002,49.82628,49.77961,49.84293,49.8914348,49.8616781,49.8124456]
data = pd.DataFrame({'Metro Name':metros_name,'Latitude':metros_latitude,'Longitude':metros_longitude})

In [205]:
data

Unnamed: 0,Metro Name,Latitude,Longitude
0,Halglar Dostlugu,40.39689,49.95299
1,Ahmedli,40.38556,49.95395
2,Neftchilar,40.41116,49.94257
3,Gara Garayev,40.41761,49.93396
4,Koroglu,40.42084,49.91802
5,28 May,40.37986,49.84864
6,Nariman Narimanov,40.40282,49.87064
7,Ganjlik,40.3999,49.85057
8,8 Noyabr,40.40187,49.82051
9,Inshaatchilar,40.38909,49.80236


In [208]:
import requests
from urllib.parse import urlencode

# Metro stansiyalarının adlarını saxlayan siyahı
metro_stations = [
    "İçərişəhər",
    "Sahil",
    "28 May",
    "Gənclik",
    "Nəriman Nərimanov",
    "Ulduz",
    "Koroğlu",
    "Qara Qarayev",
    "Neftçilər",
    "Xalqlar Dostluğu",
    "Həzi Aslanov",
    "Əhmədli",
    "Xətai",
    "Şah İsmail Xətai",
    "Cəfər Cabbarlı",
    "Nizami",
    "Elmlər Akademiyası",
    "20 Yanvar",
    "Memar Əcəmi",
    "Nəsimi",
    "Azadlıq Prospekti",
    "Dərnəgül",
]

# Nominatim API baz URL
base_url = "https://nominatim.openstreetmap.org/search?"

# Hər bir stansiyanın koordinatlarını əldə edin
for station in metro_stations:
    params = {
        'q': f"{station} metro station Baku Azerbaijan",
        'format': 'json'
    }
    url = base_url + urlencode(params)
    response = requests.get(url)
    data = response.json()

    # Nəticəni yoxlayın
    if data:
        lat = data[0]['lat']
        lon = data[0]['lon']
        print(f"Stansiya: {station}, Enlik: {lat}, Uzunluq: {lon}")
    else:
        print(f"{station} üçün koordinatlar tapılmadı")


JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [None]:
re