# 物件探しプログラム

## ライブラリーをインポートする

In [2]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
from datetime import date
import glob
import sys
from PyQt6.QtWidgets import (
    QApplication, QMainWindow, QVBoxLayout, 
    QFormLayout, QHBoxLayout, QGridLayout, 
    QLabel, QPushButton, QLineEdit, QWidget
)
from PyQt6.QtGui import QImage, QPixmap, QIcon
import PyQt6.QtCore as QtCore
import urllib.request
from PyQt6.QtWebEngineWidgets import QWebEngineView
import io, folium
from folium.features import CustomIcon

## 物件データをウェブスクレーピングで取得する

In [3]:
prefecture = '東京都'
suumo_url = 'https://suumo.jp/jj/chintai/ichiran/FR301FC001/?ar=030&bs=040&ta=13&sc=13106&sc=13107&sc=13108&sc=13118&sc=13121&sc=13122&sc=13123&cb=0.0&ct=9999999&mb=0&mt=9999999&et=9999999&cn=9999999&shkr1=03&shkr2=03&shkr3=03&shkr4=03&sngz=&po1=25&pc=50&page={}'
msearchgsi_url = 'https://msearch.gsi.go.jp/address-search/AddressSearch?q='

# ウェブサイトのスクレイピング用のhtmlを返す関数
def get_html(url):
    r = requests.get(url)
    soup = BeautifulSoup(r.content, 'html.parser')
    return soup

# 住所を都道府県と市区町村にわける関数
def divide_address(whole_address):
    matches = re.match(r'(...??[都道府県])((?:旭川|伊達|石狩|盛岡|奥州|田村|南相馬|那須塩原|東村山|武蔵村山|羽村|十日町|上越|富山|野々市|大町|蒲郡|四日市|姫路|大和郡山|廿日市|下松|岩国|田川|大村)市|.+?郡(?:玉村|大町|.+?)[町村]|.+?市.+?区|.+?[市区町村])(.+)' , whole_address)
    return matches[1], matches[2]

# 住所から座標を求める関数
def get_coords(address):
    r = requests.get(msearchgsi_url + address)
    lat, lon = r.json()[0]['geometry']['coordinates']
    return lat, lon

# apiを用いてハザード情報を返す関数
def get_api_object(info_type, coords):
    
    hazard_url_list = {
        '地震時の揺れやすさ': 'https://supportmap.jp/api/landinfo/rating/shakingintensity?longitude={}&latitude={}',
        '最寄り': 'https://supportmap.jp/api/distance/nearest?longitude={}&latitude={}&types=15',
        '土砂災害の可能性': 'https://supportmap.jp/api/landinfo/rating/landslide_new?longitude={}&latitude={}',
        '浸水の可能性': 'https://supportmap.jp/api/landinfo/rating/flood_new?longitude={}&latitude={}',
        '地形': 'https://supportmap.jp/api/landcondition?longitude={}&latitude={}',
        '液状化の可能性': 'https://supportmap.jp/api/landinfo/rating/liquefaction?longitude={}&latitude={}',
        '海抜': 'https://supportmap.jp/api/elevation/?callback=&lon={}&lat={}'
    }
    
    hazard_url = hazard_url_list[info_type]
    url = hazard_url.format(*coords)
    
    r = requests.get(url)
    
    text = re.sub('false', 'False', r.text)
    text = re.sub('true', 'True', text)
    
    hazard_obj = eval(text)

    return hazard_obj

property_datas = []
pages = 1

print('Webscraping Data From Suumo')

for page in range(1, pages + 1):
    url = suumo_url.format(page)

    soup = get_html(url)

    buildings = soup.findAll('div', {'class': 'cassetteitem'})

    for building in buildings:

        building_data = {}

        building_data['所在地'] = building.find('li', {'class': 'cassetteitem_detail-col1'}).getText().strip()
        
        building_data['都道府県'], building_data['市区町村'] = divide_address(building_data['所在地'])
        
        building_data['緯度'], building_data['経度'] = get_coords(building_data['所在地'])
        
        building_data['名称'] = building.find('div', {'class': 'cassetteitem_content-title'}).getText().strip()
        
        stations = building.findAll('div', {'class': 'cassetteitem_detail-text'})
        
        for num, station in enumerate(stations):
            if station.getText() != '':
                station_details = re.split(' ', station.getText().strip())
                building_data['最寄り' + str(num + 1)] = station_details[0]
                building_data['バス' + str(num + 1)] = station_details[1] if len(station_details) > 2 else None
                building_data['最寄りバス停' + str(num + 1)] = station_details[2] if len(station_details) > 2 else None
                building_data['徒歩' + str(num + 1)] = station_details[2] if len(station_details) > 2 else station_details[1]

        building_img = building.find('img', {'class': 'js-linkImage'})
        building_data['建物の写真'] = building_img.get('rel') if building_img != None else None

        coords = (building_data['緯度'], building_data['経度'])
        building_data['海抜'] = get_api_object('海抜', coords)['elevation']
        building_data['浸水の可能性'] = get_api_object('浸水の可能性', coords)['text']
        building_data['地震時の揺れやすさ'] = get_api_object('地震時の揺れやすさ', coords)['text']
        building_data['液状化の可能性'] = get_api_object('液状化の可能性', coords)['text']
        sediment_disaster_obj = get_api_object('土砂災害の可能性', coords)
        building_data['土砂災害の可能性'] = 'あり' if True in sediment_disaster_obj['landslideModel_New'] else 'なし'
        
        nearest = get_api_object('最寄り', coords)
        building_data['避難所名'] = max(nearest['shelter'][0]['name'].split(' '))
        building_data['避難所距離'] = nearest['shelter'][0]['distance']
        building_data['避難所座標'] = str(nearest['shelter'][0]['longitude']) + ' ' + str(nearest['shelter'][0]['latitude'])

        rooms = building.find('table', {'class': 'cassetteitem_other'}).findAll('tbody')

        for room in rooms:
            room_data = building_data.copy()

            #万円
            room_data['家賃'] = room.findAll('td')[3].findAll('li')[0].getText().strip()
            
            #円
            room_data['管理費'] = room.findAll('td')[3].findAll('li')[1].getText().strip()

            #万円
            room_data['敷金'] = room.findAll('td')[4].findAll('li')[0].getText().strip()
            
            #万円
            room_data['礼金'] = room.findAll('td')[4].findAll('li')[1].getText().strip()

            room_data['間取り'] = room.findAll('td')[5].findAll('li')[0].getText().strip()
            room_data['面積'] = room.findAll('td')[5].findAll('li')[1].getText().strip()

            room_data['間取り図'] = room.findAll('td')[1].find('img').get('rel')

            property_datas.append(room_data)

    print('Finished Scraping Page', page, 'Number of Buildings', len(buildings))

df = pd.DataFrame(property_datas)
file = './data/suumo_' + prefecture + '_' + re.sub(r'\D', '', str(date.today())) + '.csv'
df.to_csv(file, index=None, encoding ='utf-8')
print('Saved Property Data to ' + file)

Webscraping Data From Suumo
Finished Scraping Page 1 Number of Buildings 50
Saved Property Data to ./data/suumo_東京都_20240430.csv


## 取得したデータの処理を行う

In [4]:
file = max(glob.glob('./data/suumo_*_*.csv'))

df = pd.read_csv(file, header = 0, encoding = 'utf-8')

print('Preparing Data of', file)

#前処理
accesses = ['徒歩1', '徒歩2', '徒歩3']
for access in accesses:
    df[access] = df[access].replace(r'\D', '', regex = True)
    df[access] = df[access].replace('', 0)
    df[access] = df[access].fillna(60)
    df[access] = df[access].astype(int)

moneys = ['家賃', '管理費', '敷金', '礼金']
for money in moneys:
    df[money] = df[money].replace(r'\D', '', regex = True)
    df[money] = df[money].replace('', 0)
    df[money] = df[money].astype(float)
    
df['面積'] = df['面積'].str[:-2]

#処理

# 市区町村で絞る municipalitiesに存在する
municipalities = ['足立区', '墨田区', '葛飾区', '荒川区', '台東区']
df = df[df['市区町村'].isin(municipalities)]

# 家賃で絞る rent万円以下
rent = 25
df = df[ df['家賃'] <= rent ]

# 徒歩で絞る walking_distance以内
walking_distance = 10.0
df = df[( (df['徒歩1'] <= walking_distance) | (df['徒歩1'] <= walking_distance) | (df['徒歩1'] <= walking_distance) )]

# 間取りで絞る floorplansのどれか一つに当てはまる
floorplans = ['1K', '1LDK', '1DK', '3DK', '3LDK']
df = df[ df['間取り'].isin(floorplans) ]

# 家賃の安いものからソートする
df = df.sort_values('家賃', ascending=True)

df = df.astype(str)

new_file = './data/narrowed_' + max(file.split('_', 1))

df.to_csv(new_file, index = None, encoding = 'utf-8')
print('Saved Data to', new_file)

Preparing Data of ./data/suumo_東京都_20240430.csv
Saved Data to ./data/narrowed_東京都_20240430.csv


## 物件データを可視化する

In [4]:
file = max(glob.glob('./data/narrowed_*_*.csv'))

df = pd.read_csv(file, header=0, encoding = 'utf-8')

class showDatasWindow(QMainWindow):
    
    def __init__(self):
        super().__init__()
        
        self.df = df
        self.pageNum = 0
        
        self.setWindowTitle('物件探し')
        self.setWindowIcon(QIcon('home.png'))
        self.setFixedWidth(650)
        self.setFixedHeight(750)

        self.propertyPushButton = QPushButton('物件情報')
        self.propertyPushButton.clicked.connect(self.showProperty)
        self.hazardPushButton = QPushButton('ハザード情報')
        self.hazardPushButton.clicked.connect(self.showHazard)
        self.dataTypeButtonsLayout = QHBoxLayout()
        self.dataTypeButtonsLayout.addWidget(self.propertyPushButton)
        self.dataTypeButtonsLayout.addWidget(self.hazardPushButton)
        self.dataTypeButtonsWidget = QWidget()
        self.dataTypeButtonsWidget.setLayout(self.dataTypeButtonsLayout)
        
        self.buildingPixmap = QPixmap()
        self.buildingImageLabel = QLabel()
        self.floorplanPixmap = QPixmap()
        self.floorplanImageLabel = QLabel()
        
        self.propertyImagesLayout = QVBoxLayout()
        self.propertyImagesLayout.addWidget(self.buildingImageLabel)
        self.propertyImagesLayout.addWidget(self.floorplanImageLabel)
        self.propertyImagesWidget = QWidget()
        self.propertyImagesWidget.setLayout(self.propertyImagesLayout)
        
        self.propertyItems = ['所在地', '名称', '最寄り1', '最寄り2', '最寄り3', '家賃', '管理費', '敷金', '礼金', '間取り', '面積']
        
        self.propertyItemLabel = {}
        self.propertyItemLineEdit = {}
        self.propertyItemsLayout = QFormLayout()
        for propertyItem in self.propertyItems:
            self.propertyItemLabel[propertyItem] = QLabel(propertyItem)
            self.propertyItemLineEdit[propertyItem] = QLineEdit()
            self.propertyItemLineEdit[propertyItem].setReadOnly(True)
            self.propertyItemsLayout.addWidget(self.propertyItemLabel[propertyItem])
            self.propertyItemsLayout.addWidget(self.propertyItemLineEdit[propertyItem])
        self.propertyItemsWidget = QWidget()
        self.propertyItemsWidget.setLayout(self.propertyItemsLayout)
        self.propertyItemsWidget.setMaximumSize(300, 1000)
        
        self.propertyLayout = QHBoxLayout()
        self.propertyLayout.addWidget(self.propertyImagesWidget)
        self.propertyLayout.addWidget(self.propertyItemsWidget)
        self.propertyWidget = QWidget()
        self.propertyWidget.setLayout(self.propertyLayout)
        
        self.hazardFoliumWidget = QWebEngineView()
        self.hazardImagesLayout= QVBoxLayout()
        self.hazardImagesLayout.addWidget(self.hazardFoliumWidget)
        self.hazardImagesWidget = QWidget()
        self.hazardImagesWidget.setLayout(self.hazardImagesLayout)
        
        self.hazardItems = ['海抜', '浸水の可能性', '地震時の揺れやすさ', '液状化の可能性', '土砂災害の可能性', '避難所名', '避難所距離']
        
        self.hazardItemLabel = {}
        self.hazardItemLineEdit = {}
        self.hazardItemsLayout = QFormLayout()
        for hazardItem in self.hazardItems:
            self.hazardItemLabel[hazardItem] = QLabel(hazardItem)
            self.hazardItemLineEdit[hazardItem] = QLineEdit()
            self.hazardItemLineEdit[hazardItem].setReadOnly(True)
            self.hazardItemsLayout.addWidget(self.hazardItemLabel[hazardItem])
            self.hazardItemsLayout.addWidget(self.hazardItemLineEdit[hazardItem])
        self.hazardItemsWidget = QWidget()
        self.hazardItemsWidget.setLayout(self.hazardItemsLayout)
        self.hazardItemsWidget.setMinimumSize(200, 0)
        
        self.hazardLayout = QHBoxLayout()
        self.hazardLayout.addWidget(self.hazardImagesWidget)
        self.hazardLayout.addWidget(self.hazardItemsWidget)
        self.hazardWidget = QWidget()
        self.hazardWidget.setLayout(self.hazardLayout)
        
        self.nextPushButton = QPushButton('-->')
        self.nextPushButton.clicked.connect(self.nextData)
        self.backPushButton = QPushButton('<--')
        self.backPushButton.clicked.connect(self.lastData)
        self.navButtonsLayout = QHBoxLayout()
        self.navButtonsLayout.addWidget(self.backPushButton)
        self.navButtonsLayout.addWidget(self.nextPushButton)
        self.navButtonsWidget = QWidget()
        self.navButtonsWidget.setLayout(self.navButtonsLayout)
        
        self.mainLayout = QVBoxLayout()
        self.mainLayout.addWidget(self.dataTypeButtonsWidget)
        self.mainLayout.addWidget(self.propertyWidget)
        self.mainLayout.addWidget(self.hazardWidget)
        self.mainLayout.addWidget(self.navButtonsWidget)
        self.mainWidget = QWidget()
        self.mainWidget.setLayout(self.mainLayout)
        
        self.setCentralWidget(self.mainWidget)
        
        self.propertyWidget.hide()
        self.hazardWidget.hide()
        
        self.updateCurrentData()
        self.showProperty()
    
    def updateCurrentData(self):
        self.currentData = self.df.iloc[self.pageNum]
        if self.pageNum == 0:
            self.backPushButton.hide()
        else:
            self.backPushButton.show()
        
        if self.pageNum == self.df.shape[0] - 1:
            self.nextPushButton.hide()
        else:
            self.nextPushButton.show()
    
    def nextData(self):
        self.pageNum = self.pageNum + 1
        self.updateCurrentData()
        if self.propertyDataIsShown:
            self.showProperty()
        else:
            self.showHazard()
    
    def lastData(self):
        self.pageNum = self.pageNum - 1
        self.updateCurrentData()
        if self.propertyDataIsShown:
            self.showProperty()
        else:
            self.showHazard()
    
    def showProperty(self):
        self.propertyDataIsShown = True
        
        self.buildingImageUrl = self.currentData['建物の写真']
        if type(self.buildingImageUrl) is str:
            self.buildingIamgeData = urllib.request.urlopen(self.buildingImageUrl).read()
            self.buildingPixmap.loadFromData(self.buildingIamgeData)
            self.resizedBuildingPixmap = self.buildingPixmap.scaled(300, 300, QtCore.Qt.AspectRatioMode.KeepAspectRatio)
            self.buildingImageLabel.setPixmap(self.resizedBuildingPixmap)
        else: 
            self.buildingIamgeData = urllib.request.urlopen('https://t3.ftcdn.net/jpg/04/34/72/82/240_F_434728286_OWQQvAFoXZLdGHlObozsolNeuSxhpr84.jpg').read()
            self.buildingPixmap.loadFromData(self.buildingIamgeData)
            self.resizedBuildingPixmap = self.buildingPixmap.scaled(300, 300, QtCore.Qt.AspectRatioMode.KeepAspectRatio)
            self.buildingImageLabel.setPixmap(self.resizedBuildingPixmap)
        
        self.floorplanImageUrl = self.currentData['間取り図']
        if type(self.floorplanImageUrl) is str:
            self.floorplanIamgeData = urllib.request.urlopen(self.floorplanImageUrl).read()
            self.floorplanPixmap.loadFromData(self.floorplanIamgeData)
            self.resizedFloorplanPixmap = self.floorplanPixmap.scaled(300, 300, QtCore.Qt.AspectRatioMode.KeepAspectRatio, QtCore.Qt.TransformationMode.FastTransformation)
            self.floorplanImageLabel.setPixmap(self.resizedFloorplanPixmap)
        else:
            self.floorplanIamgeData = urllib.request.urlopen('https://t3.ftcdn.net/jpg/04/34/72/82/240_F_434728286_OWQQvAFoXZLdGHlObozsolNeuSxhpr84.jpg').read()
            self.floorplanPixmap.loadFromData(self.floorplanIamgeData)
            self.resizedFloorplanPixmap = self.floorplanPixmap.scaled(300, 300, QtCore.Qt.AspectRatioMode.KeepAspectRatio, QtCore.Qt.TransformationMode.FastTransformation)
            self.floorplanImageLabel.setPixmap(self.resizedFloorplanPixmap)
            
            
        for propertyItem in self.propertyItems:
            self.propertyItemLineEdit[propertyItem].setText(str(self.currentData[propertyItem]))
        
        self.hazardWidget.hide()
        self.propertyWidget.show()
        
    def showHazard(self):
        self.propertyDataIsShown = False
        for hazardItem in self.hazardItems:
            self.hazardItemLineEdit[hazardItem].setText(str(self.currentData[hazardItem]))
        
        self.coords = [self.currentData['経度'], self.currentData['緯度']]
        
        self.foliumMap = folium.Map(
            location = self.coords,
            tiles = None,
            zoom_start = 18
        )
        
        self.foliumBaseMaps = {
            '淡色地図': 'https://cyberjapandata.gsi.go.jp/xyz/pale/{z}/{x}/{y}.png',
            '標準地図': 'https://cyberjapandata.gsi.go.jp/xyz/std/{z}/{x}/{y}.png',
            '白地図': 'https://cyberjapandata.gsi.go.jp/xyz/blank/{z}/{x}/{y}.png'
        }
        
        for mapName, mapLink in self.foliumBaseMaps.items():
            folium.TileLayer(
                tiles = mapLink,
                name = mapName,
                attr = mapName
            ).add_to(self.foliumMap)
        
        self.foliumLayers = {
            '洪水浸水想定区域': 'https://disaportaldata.gsi.go.jp/raster/01_flood_l2_shinsuishin_data/{z}/{x}/{y}.png',
            '津波浸水想定': 'https://disaportaldata.gsi.go.jp/raster/04_tsunami_newlegend_data/{z}/{x}/{y}.png',
            '土砂災害警戒区域': 'https://disaportaldata.gsi.go.jp/raster/05_dosekiryukeikaikuiki/{z}/{x}/{y}.png'
        }
        
        for layerName, layerLink in self.foliumLayers.items():
            folium.raster_layers.TileLayer(
                tiles = layerLink,
                name = layerName,
                attr = layerName,
                overlay=True,
                control=True,
                show=False,
                opacity=0.9
            ).add_to(self.foliumMap)
        
        folium.LayerControl().add_to(self.foliumMap)
        
        self.houseMarker = CustomIcon(
            icon_image = './icon/home.png',
            icon_size = (30, 30),
            icon_anchor = (15, 15),
        )
        
        self.shelterMarker = CustomIcon(
            icon_image = './icon/shelter.png',
            icon_size = (30, 30),
            icon_anchor = (15, 15)
        )
        
        self.foliumHouseMarker = folium.Marker(
            location = self.coords,
            icon = self.houseMarker
        ).add_to(self.foliumMap)
        
        self.shelterCoords = [float(max(self.currentData['避難所座標'].split(' '))), float(min(self.currentData['避難所座標'].split(' ')))]
        
        self.foliumShelterMarker = folium.Marker(
            location = self.shelterCoords,
            icon = self.shelterMarker
        ).add_to(self.foliumMap)
        
        self.foliumData = io.BytesIO()
        self.foliumMap.save(self.foliumData, close_file = False)
        
        self.hazardFoliumWidget.setHtml(self.foliumData.getvalue().decode())
        self.hazardFoliumWidget.setMinimumHeight(200)
        
        self.propertyWidget.hide()
        self.hazardWidget.show()
        
if __name__ == '__main__':    
    app = QtCore.QCoreApplication.instance()
    if app is None:
        app = QApplication(sys.argv)
    app.aboutToQuit.connect(app.deleteLater)
    mainWindow = showDatasWindow()
    mainWindow.show()
    sys.exit(app.exec())

Path override failed for key base::DIR_APP_DICTIONARIES and path '/usr/bin/qtwebengine_dictionaries'
Path override failed for key base::DIR_APP_DICTIONARIES and path '/home/arcdev/school/future-engineering/archived/3rdYear/suumo-hazard-risk/.venv/lib/python3.9/site-packages/PyQt6/Qt6/libexec/qtwebengine_dictionaries'
Path override failed for key base::DIR_APP_DICTIONARIES and path '/home/arcdev/school/future-engineering/archived/3rdYear/suumo-hazard-risk/.venv/lib/python3.9/site-packages/PyQt6/Qt6/libexec/qtwebengine_dictionaries'
Release of profile requested but WebEnginePage still not deleted. Expect troubles !


SystemExit: 0

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
