In [1]:
import requests 
from bs4 import BeautifulSoup as soup
import re
import json
import pandas as pd

In [2]:
AMPERURL = r'https://www.amper.org.ro/sesizari/'
HEADERS = {"user-agent": "@WSzP's Problema-ML miner"}
DATA_CSV_PATH = r'./Data/amper.csv'

In [3]:
def crawl(url,headers):
    res = requests.get(url, headers=headers) 
    print(res.headers)
    print(f"Content-Type: {res.headers['Content-Type']}")
    print(f"Encoding: {res.encoding}")
    return res.text

In [4]:
amper = crawl(AMPERURL,HEADERS)

{'Date': 'Tue, 19 May 2020 20:49:26 GMT', 'Server': 'Apache', 'X-Frame-Options': 'SAMEORIGIN', 'X-Pingback': 'https://www.amper.org.ro/sesizari/xmlrpc.php', 'Link': '<https://www.amper.org.ro/sesizari/wp-json/>; rel="https://api.w.org/", <https://www.amper.org.ro/sesizari/>; rel=shortlink', 'Keep-Alive': 'timeout=5, max=100', 'Connection': 'Keep-Alive', 'Transfer-Encoding': 'chunked', 'Content-Type': 'text/html; charset=UTF-8'}
Content-Type: text/html; charset=UTF-8
Encoding: UTF-8


In [5]:
page_soup = soup(amper, 'html.parser')
data = page_soup.select("[type='text/javascript']")[8].string.strip()
data[:500]

'/* <![CDATA[ */\nvar theme_data = {"url":"https:\\/\\/www.amper.org.ro\\/sesizari\\/wp-content\\/themes\\/sesizari","error_message":"V\\u0103 rug\\u0103m s\\u0103 introduce\\u021bi un fi\\u0219ier valid. (jpg, png, pdf)"};\nvar map_location = {"pins":[{"title":"Str.Parangului 2, T\\u00e2rgu Mure\\u0219, judetul: Mures","description":"Zona de la scarile FURNICA urcare spre Str.Magurei -Str.Parangului in stare dezastruoasa, inclusiv starea scarilor , respectiv asa zis parc de joaca.","url":"https:\\/\\/www.amper.o'

In [6]:
map_location_regex = r'^.*map\_location.*$'
map_location_json = re.findall(map_location_regex, data, re.MULTILINE)
map_location_json = map_location_json[0][27:-2]
print(map_location_json[:500])
print('...')
print(map_location_json[-500:])

[{"title":"Str.Parangului 2, T\u00e2rgu Mure\u0219, judetul: Mures","description":"Zona de la scarile FURNICA urcare spre Str.Magurei -Str.Parangului in stare dezastruoasa, inclusiv starea scarilor , respectiv asa zis parc de joaca.","url":"https:\/\/www.amper.org.ro\/sesizari\/sesizare\/str-parangului-2-targu-mures-mures\/","address":"Romania, Mures, T\u00e2rgu Mure\u0219, Str.Parangului 2","coordinates":"46.535621, 24.560997","status_value":"pending","status_label":"In curs de rezolvare","cate
...
811798096","status_value":"pending","status_label":"In curs de rezolvare","category":46},{"title":"Strada Recoltei 7, Targu-Mures, judetul: Mures","description":"Groapa extraordinar de mare la trecerea caii ferate.","url":"https:\/\/www.amper.org.ro\/sesizari\/sesizare\/strada-recoltei-7-targu-mures-mures\/","address":"Romania, Mures, Targu-Mures, Strada Recoltei 7","coordinates":"46.51812263151161, 24.52126979827881","status_value":"pending","status_label":"In curs de rezolvare","category"

In [7]:
map_loc = json.loads(map_location_json)
map_loc[0]

{'title': 'Str.Parangului 2, Târgu Mureș, judetul: Mures',
 'description': 'Zona de la scarile FURNICA urcare spre Str.Magurei -Str.Parangului in stare dezastruoasa, inclusiv starea scarilor , respectiv asa zis parc de joaca.',
 'url': 'https://www.amper.org.ro/sesizari/sesizare/str-parangului-2-targu-mures-mures/',
 'address': 'Romania, Mures, Târgu Mureș, Str.Parangului 2',
 'coordinates': '46.535621, 24.560997',
 'status_value': 'pending',
 'status_label': 'In curs de rezolvare',
 'category': 57}

In [11]:
numIssues = len(map_loc)
print(f'Number of issues on AMPER: {numIssues}')

Number of issues on AMPER: 758


In [9]:
issues = []
for issue in map_loc:
    issues.append([issue['category'], issue['title'], issue['description'], issue['address'], issue['coordinates'], issue['status_value'],issue['status_label'], issue['url']])  

df =  pd.DataFrame(issues, columns=['Category_ID','Title','Description','Address','Coordinates','Status_value','Status_label','Url'])
df

Unnamed: 0,Category_ID,Title,Description,Address,Coordinates,Status_value,Status_label,Url
0,57,"Str.Parangului 2, Târgu Mureș, judetul: Mures",Zona de la scarile FURNICA urcare spre Str.Mag...,"Romania, Mures, Târgu Mureș, Str.Parangului 2","46.535621, 24.560997",pending,In curs de rezolvare,https://www.amper.org.ro/sesizari/sesizare/str...
1,52,"Godeanu, bl. 28, Târgu Mureș, judetul: Mures","În bucla pe care o face strada Godeanu, gunoiu...","Romania, Mures, Târgu Mureș, Godeanu, bl. 28","46.52525199999999, 24.554159",complete,Rezolvat,https://www.amper.org.ro/sesizari/sesizare/god...
2,57,"Bd. 1 Decembrie 1918 nr. 219, Târgu Mureș, jud...",Semaforul de la Fortuna este stricat din cauza...,"Romania, Mures, Târgu Mureș, Bd. 1 Decembrie 1...","46.5352179, 24.5838776",complete,Rezolvat,https://www.amper.org.ro/sesizari/sesizare/bd-...
3,56,"rozmarinului, 1, Târgu Mureș, judetul: Mures",La intersectia dintre strazile Rozmarinului si...,"Romania, Mures, Târgu Mureș, rozmarinului, 1","46.5260062, 24.5361727",complete,Rezolvat,https://www.amper.org.ro/sesizari/sesizare/roz...
4,56,"Gheorghe Doja 64-68, Târgu Mureș, judetul: Mures",Construirea unei statii de autobuz interjudete...,"Romania, Mures, Târgu Mureș, Gheorghe Doja 64-68","46.5268568, 24.5414245",complete,Rezolvat,https://www.amper.org.ro/sesizari/sesizare/ghe...
...,...,...,...,...,...,...,...,...
753,56,"Aleea Carpați 21, Targu-Mures, judetul: Mures",Portiune lipsa in trecerea caii ferate din ale...,"Romania, Mures, Targu-Mures, Aleea Carpați 21","46.553327087591285, 24.55823063850403",complete,Rezolvat,https://www.amper.org.ro/sesizari/sesizare/ale...
754,56,"Strada Secerei 22, Targu-Mures, judetul: Mures",Pod surpat!,"Romania, Mures, Targu-Mures, Strada Secerei 22","46.536701914998574, 24.571722149848938",complete,Rezolvat,https://www.amper.org.ro/sesizari/sesizare/str...
755,56,"Strada Somnului 1, Targu-Mures, judetul: Mures",Gropi si denivelari imense imediat cum treci c...,"Romania, Mures, Targu-Mures, Strada Somnului 1","46.54757194493636, 24.54906553030014",pending,In curs de rezolvare,https://www.amper.org.ro/sesizari/sesizare/str...
756,46,"Strada Şurianu 8, Targu-Mures, judetul: Mures",Curtea Scolii 17 arata groaznic dupa ploaie.,"Romania, Mures, Targu-Mures, Strada Şurianu 8","46.53108524920353, 24.560043811798096",pending,In curs de rezolvare,https://www.amper.org.ro/sesizari/sesizare/str...


In [10]:
df.to_csv(DATA_CSV_PATH)