## Finding data for Graz, Austria

### Instaling and importing Beatifull Soup for Web Scraping

In [1]:
!pip install beautifulsoup4



In [2]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

__Web scraping__

On a website https://www.graz.at/cms/beitrag/10034856/7769112/Die_Bezirke.html there are zip codes for all boroughs in city of Graz in Austria


In [3]:
#requesting a url

url = requests.get("https://www.graz.at/cms/beitrag/10034856/7769112/Die_Bezirke.html")
url = url.content

In [4]:
#web scraping with beautifulsoup
soup = BeautifulSoup(url, 'html.parser')

In [5]:
table = soup.find_all('div', {'class':"txtblock-content standard"})

In [6]:
table = table[0]
table = table.ol

In [7]:
gplz_list = []
lis = table.find_all("li")
a = lis[0].text.strip().split(":")
a[1].split(" ")

['', '1,16', 'Quadratkilometer,', '3.899', '(3.933)', 'EinwohnerInnen']

In [8]:
for li in lis:
    l = li.text.strip().split(":")
    gplz_list.append(l)


In [9]:
plz_city = []
area_num = []
for plz in gplz_list:
    plz_city.append(plz[0])
    area_num.append(plz[1])
plz_city
area_num

[' 1,16 Quadratkilometer, 3.899 (3.933) EinwohnerInnen',
 ' 1,83 Quadratkilometer, 16.235 (16.123) EinwohnerInnen',
 ' 5,50 Quadratkilometer, 25.300 (24.990) EinwohnerInnen',
 ' 3,70 Quadratkilometer, 30.966 (30.891) EinwohnerInnen',
 ' 5,05 Quadratkilometer, 28.735 (27.732) EinwohnerInnen',
 ' 4,06 Quadratkilometer, 33.283 (33.082) EinwohnerInnen',
 ' 7,99 Quadratkilometer, 14.417 (14.170) EinwohnerInnen',
 ' 8,86 Quadratkilometer, 15.139 (14.937) EinwohnerInnen',
 ' 4,48 Quadratkilometer, 11.906 (11.869) EinwohnerInnen',
 ' 10,16 Quadratkilometer,\xa0 5.910 (5.886) EinwohnerInnen',
 ' 13,99 Quadratkilometer, 9.756 (9.647) EinwohnerInnen',
 ' 18,47 Quadratkilometer, 19.197 (19.022) EinwohnerInnen',
 ' 10,83 Quadratkilometer, 11.129 (10.900) EinwohnerInnen',
 ' 7,79 Quadratkilometer, 20.553 (20.075) EinwohnerInnen',
 ' 5,77 Quadratkilometer, 15.630 (15.215) EinwohnerInnen',
 ' 11,75 Quadratkilometer, 16.003 (15.590) EinwohnerInnen',
 ' 6,18 Quadratkilometer, 8.628 (8.417) EinwohnerInne

In [10]:
plz = [plz[0:4] for plz in plz_city]
borough = [bor[4:].strip() for bor in plz_city]

In [11]:
area_num = [are.split(" ") for are in area_num]
area = []
population = []

In [12]:
#geting the area of borouhg
for are in area_num:
    area.append(are[1])
area

['1,16',
 '1,83',
 '5,50',
 '3,70',
 '5,05',
 '4,06',
 '7,99',
 '8,86',
 '4,48',
 '10,16',
 '13,99',
 '18,47',
 '10,83',
 '7,79',
 '5,77',
 '11,75',
 '6,18']

In [14]:
#geting the population of borough
for pop in area_num:
    population.append(pop[3])
population

['3.899',
 '16.235',
 '25.300',
 '30.966',
 '28.735',
 '33.283',
 '14.417',
 '15.139',
 '11.906',
 '5.910',
 '9.756',
 '19.197',
 '11.129',
 '20.553',
 '15.630',
 '16.003',
 '8.628']

### Making Pandas Dataframe from Scraped Data

In [39]:
graz = pd.DataFrame({'Post Code':plz, 'Borough':borough, 'Area [km2]':area, 'Population':population })

In [40]:
graz

Unnamed: 0,Post Code,Borough,Area [km2],Population
0,8010,Innere Stadt,116,3.899
1,8010,St. Leonhard,183,16.235
2,8010,Geidorf,550,25.3
3,8020,Lend,370,30.966
4,8020,Gries,505,28.735
5,8010,Jakomini,406,33.283
6,8041,Liebenau,799,14.417
7,8042,St. Peter,886,15.139
8,8010,Waltendorf,448,11.906
9,8010,Ries,1016,5.91


### Geting latitudes and longitudes of boroughs in Graz

__Installing and importing geopy library__

In [29]:
!pip install geopy



__From geopy I use Nominatim to get latitude and longitude__

In [33]:
import geopy
from geopy.geocoders import Nominatim

In [37]:
locator = Nominatim(user_agent="myGeocoder")
#location = locator.geocode("Gries, Graz, Austria")

In [51]:
latitude =[]
longitude = []

for bor in borough:
    print(bor + ", Graz, Austria")
    location = locator.geocode(bor + ", Graz, Austria")
    if location!=None:
        latitude.append(location.latitude)
        longitude.append(location.longitude)
    else:
        latitude.append(None)
        longitude.append(None)

Innere Stadt, Graz, Austria
St. Leonhard, Graz, Austria
Geidorf, Graz, Austria
Lend, Graz, Austria
Gries, Graz, Austria
Jakomini, Graz, Austria
Liebenau, Graz, Austria
St. Peter, Graz, Austria
Waltendorf, Graz, Austria
Ries, Graz, Austria
Mariatrost, Graz, Austria
Andritz, Graz, Austria
Gösting, Graz, Austria
Eggenberg, Graz, Austria
Wetzelsdorf, Graz, Austria
Straßgang, Graz, Austria
Puntigam, Graz, Austria


__Adding latitude and longitude into padndas dataframe__

In [54]:
graz['Latitude'] = latitude
graz['longitude'] = longitude

In [55]:
graz

Unnamed: 0,Post Code,Borough,Area [km2],Population,Latitude,longitude
0,8010,Innere Stadt,116,3.899,47.074261,15.438466
1,8010,St. Leonhard,183,16.235,47.068287,15.456344
2,8010,Geidorf,550,25.3,47.084668,15.442896
3,8020,Lend,370,30.966,47.079675,15.420325
4,8020,Gries,505,28.735,47.061222,15.42737
5,8010,Jakomini,406,33.283,47.059623,15.444707
6,8041,Liebenau,799,14.417,47.040169,15.449265
7,8042,St. Peter,886,15.139,47.058701,15.469985
8,8010,Waltendorf,448,11.906,47.067741,15.477172
9,8010,Ries,1016,5.91,47.088113,15.49718


## Now I need to find everything for Stuttgart, Germany

__Web scraping with beautifulsoup__

In [57]:
url = "https://www.suche-postleitzahl.org/stuttgart-plz-70173-70629.608e"
url = requests.get(url)
url = url.content

Pandas library has a method read_html, which puts data from table in one dataframe

In [58]:
stut = pd.read_html(url)

In [64]:
stut[1]

Unnamed: 0,Stadtteil,Postleitzahl
0,Asemwald,70599
1,Bad Cannstatt,"70191, 70372, 70374, 70376, 70378"
2,Bergheim,70499
3,Birkach,70599
4,Botnang,70195
5,Burgholzhof,70376
6,Büsnau,70569
7,Chausseefeld,70599
8,Degerloch,70597
9,Dürrlewang,70565
