<a href="https://colab.research.google.com/github/imabari/ImabariScraping/blob/master/mls_filter_ishikawa_geopandas.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
from bs4 import BeautifulSoup

In [2]:
import pathlib
import re

In [3]:
import pandas as pd

In [4]:
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
}

In [5]:
def fetch_soup(url, parser="html.parser"):

    r = requests.get(url, headers=headers)
    r.raise_for_status()

    soup = BeautifulSoup(r.content, parser)

    return soup

In [6]:
def fetch_file(url, dir="."):

    p = pathlib.Path(dir, pathlib.PurePath(url).name)
    p.parent.mkdir(parents=True, exist_ok=True)

    r = requests.get(url)
    r.raise_for_status()

    with p.open(mode="wb") as fw:
        fw.write(r.content)
    return p

# スクレイピング

In [7]:
url = "https://location.services.mozilla.com/downloads"

soup = fetch_soup(url)

In [8]:
tag = soup.find("a", string=re.compile("^MLS-full-cell-export-\d{4}-\d{2}-\d{2}T000000.csv.gz"))

In [9]:
path_csv = fetch_file(tag.get("href"))

# 前処理

In [10]:
import pandas as pd

In [11]:
df0 = pd.read_csv(path_csv)

## 抽出

MCC・MNC

https://ja.wikipedia.org/wiki/Mobile_Network_Code

In [12]:
df1 = (
    df0[
        (df0["radio"] == "LTE")
        & (df0["mcc"].isin([440, 441]))
        & (df0["range"] > 0)
        & (df0["samples"] > 1)
    ]
    .dropna(subset=["unit"])
    .copy()
    .reset_index(drop=True)
)

In [13]:
df1["unit"] = df1["unit"].astype(int)

In [14]:
df1["created"] = pd.to_datetime(df1["created"], unit="s")
df1["updated"] = pd.to_datetime(df1["updated"], unit="s")

In [15]:
df1

Unnamed: 0,radio,mcc,net,area,cell,unit,lon,lat,range,samples,changeable,created,updated,averageSignal
0,LTE,440,0,4115,4629250,191,138.612602,35.621624,3572,26,1,2021-03-05 23:42:54,2023-03-26 10:03:13,
1,LTE,440,0,4118,2102018,58,140.637758,35.900076,1469,34,1,2018-04-19 11:50:25,2023-11-25 04:02:40,
2,LTE,440,0,4120,2305538,23,139.897278,36.555091,551,16,1,2018-11-29 12:13:05,2023-12-16 12:48:27,
3,LTE,440,0,4120,2305793,153,139.606263,36.783805,1838,25,1,2022-07-30 15:17:02,2023-04-23 17:55:11,
4,LTE,440,0,4120,2324481,327,139.977128,36.904738,24058,457,1,2019-04-12 16:08:59,2023-05-15 06:31:17,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
419714,LTE,441,0,51970,236701226,0,130.426351,33.618492,54,3,1,2023-12-10 21:23:31,2023-12-10 21:23:31,
419715,LTE,441,0,51970,236726628,0,130.438245,33.611217,1377,30,1,2023-11-05 17:22:57,2023-11-05 17:22:57,
419716,LTE,441,0,51970,236736374,0,130.437587,33.611572,63,3,1,2023-12-10 21:24:02,2023-12-10 21:24:02,
419717,LTE,441,0,51970,236808556,0,130.475078,33.593123,38,2,1,2023-11-03 22:51:52,2023-11-03 22:51:52,


In [16]:
df1["net"].value_counts().sort_index()

0        868
2         68
4          3
5        339
10     97486
11    120310
20     77161
50     15250
51     81216
52      2976
53     20256
54      3786
Name: net, dtype: int64

In [17]:
df1.shape

(419719, 14)

# geopandas

国土数値情報　行政区域データ

https://nlftp.mlit.go.jp/ksj/gml/datalist/KsjTmplt-N03-v3_1.html

In [18]:
!wget https://nlftp.mlit.go.jp/ksj/gml/data/N03/N03-2023/N03-20230101_17_GML.zip

--2024-01-16 11:05:30--  https://nlftp.mlit.go.jp/ksj/gml/data/N03/N03-2023/N03-20230101_17_GML.zip
Resolving nlftp.mlit.go.jp (nlftp.mlit.go.jp)... 147.154.8.35, 147.154.3.128, 147.154.45.210
Connecting to nlftp.mlit.go.jp (nlftp.mlit.go.jp)|147.154.8.35|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5006858 (4.8M) [application/zip]
Saving to: ‘N03-20230101_17_GML.zip’


2024-01-16 11:05:33 (4.95 MB/s) - ‘N03-20230101_17_GML.zip’ saved [5006858/5006858]



In [19]:
!pip install geopandas



In [20]:
import geopandas as gpd

In [21]:
pt_df = gpd.GeoDataFrame(df1, geometry = gpd.points_from_xy(df1.lon, df1.lat), crs="EPSG:6668")
ishikawa = gpd.read_file("N03-20230101_17_GML.zip", encoding="cp932")

In [22]:
# 列名変更
ishikawa.rename(columns={"N03_001": "都道府県名", "N03_002": "支庁・振興局名", "N03_003": "郡・政令都市名", "N03_004": "市区町村名", "N03_007": "行政区域コード"}, inplace=True)

In [23]:
ishikawa

Unnamed: 0,OBJECTID,都道府県名,支庁・振興局名,郡・政令都市名,市区町村名,行政区域コード,Shape_Leng,Shape_Area,geometry
0,1.0,石川県,,,金沢市,17201,0.002292,1.089518e-07,"POLYGON ((136.59501 36.61629, 136.59429 36.615..."
1,2.0,石川県,,,金沢市,17201,0.002294,1.071961e-07,"POLYGON ((136.59537 36.61667, 136.59503 36.616..."
2,3.0,石川県,,,金沢市,17201,0.002589,1.224443e-07,"POLYGON ((136.59647 36.61781, 136.59578 36.617..."
3,4.0,石川県,,,金沢市,17201,0.002593,1.253285e-07,"POLYGON ((136.59688 36.61824, 136.59663 36.617..."
4,5.0,石川県,,,金沢市,17201,0.002594,1.239789e-07,"POLYGON ((136.59769 36.61908, 136.59748 36.618..."
...,...,...,...,...,...,...,...,...,...
1696,1697.0,石川県,,鳳珠郡,能登町,17463,0.001279,7.981337e-08,"POLYGON ((137.24268 37.37134, 137.24267 37.371..."
1697,1698.0,石川県,,鳳珠郡,能登町,17463,0.001787,1.533144e-07,"POLYGON ((137.24184 37.37155, 137.24184 37.371..."
1698,1699.0,石川県,,鳳珠郡,能登町,17463,0.000357,7.984804e-09,"POLYGON ((137.24217 37.37152, 137.24211 37.371..."
1699,1700.0,石川県,,鳳珠郡,能登町,17463,0.000799,2.411877e-08,"POLYGON ((137.24396 37.37156, 137.24396 37.371..."


In [24]:
# 石川県のみ抽出
spj = gpd.sjoin(pt_df, ishikawa).sort_values(by=["cell", "net", "行政区域コード", "samples"])
spj.shape

(3104, 24)

In [25]:
df2 = spj.drop_duplicates(subset=["cell", "行政区域コード"]).copy().reset_index(drop=True)

In [26]:
df2["unit"].value_counts()

1      16
233    14
234    13
162    13
0      13
       ..
415     1
442     1
86      1
445     1
450     1
Name: unit, Length: 465, dtype: int64

In [27]:
df2

Unnamed: 0,radio,mcc,net,area,cell,unit,lon,lat,range,samples,...,geometry,index_right,OBJECTID,都道府県名,支庁・振興局名,郡・政令都市名,市区町村名,行政区域コード,Shape_Leng,Shape_Area
0,LTE,440,0,16407,5128961,231,136.733376,36.744223,16355,68,...,POINT (136.73338 36.74422),1059,1060.0,石川県,,,かほく市,17209,0.538833,0.006502
1,LTE,440,20,16407,5128962,233,136.642738,36.617224,521,7,...,POINT (136.64274 36.61722),7,8.0,石川県,,,金沢市,17201,1.479955,0.047166
2,LTE,440,20,16407,5141506,409,136.654529,36.568320,202,16,...,POINT (136.65453 36.56832),7,8.0,石川県,,,金沢市,17201,1.479955,0.047166
3,LTE,440,20,16407,5141507,410,136.642740,36.570330,1104,22,...,POINT (136.64274 36.57033),7,8.0,石川県,,,金沢市,17201,1.479955,0.047166
4,LTE,440,53,49224,50338305,309,136.799322,37.008086,586,62,...,POINT (136.79932 37.00809),1562,1563.0,石川県,,羽咋郡,志賀町,17384,1.833395,0.024993
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2902,LTE,440,51,49220,267432460,24,136.460988,36.440223,709,46,...,POINT (136.46099 36.44022),1062,1063.0,石川県,,,能美市,17211,0.641024,0.008456
2903,LTE,440,50,49217,267436032,438,136.656854,36.561694,24,2,...,POINT (136.65685 36.56169),7,8.0,石川県,,,金沢市,17201,1.479955,0.047166
2904,LTE,440,50,49217,267436033,438,136.656735,36.561668,25,3,...,POINT (136.65673 36.56167),7,8.0,石川県,,,金沢市,17201,1.479955,0.047166
2905,LTE,440,50,49218,267457025,151,136.759591,36.593192,316,26,...,POINT (136.75959 36.59319),7,8.0,石川県,,,金沢市,17201,1.479955,0.047166


In [28]:
df_ishikawa = df2[df2["updated"] > "2024-01-01 16:11:00"].copy()

In [29]:
df_ishikawa

Unnamed: 0,radio,mcc,net,area,cell,unit,lon,lat,range,samples,...,geometry,index_right,OBJECTID,都道府県名,支庁・振興局名,郡・政令都市名,市区町村名,行政区域コード,Shape_Leng,Shape_Area
1,LTE,440,20,16407,5128962,233,136.642738,36.617224,521,7,...,POINT (136.64274 36.61722),7,8.0,石川県,,,金沢市,17201,1.479955,0.047166
2,LTE,440,20,16407,5141506,409,136.654529,36.568320,202,16,...,POINT (136.65453 36.56832),7,8.0,石川県,,,金沢市,17201,1.479955,0.047166
3,LTE,440,20,16407,5141507,410,136.642740,36.570330,1104,22,...,POINT (136.64274 36.57033),7,8.0,石川県,,,金沢市,17201,1.479955,0.047166
48,LTE,440,53,49287,50490881,105,136.284931,36.266432,4073,157,...,POINT (136.28493 36.26643),1034,1035.0,石川県,,,加賀市,17206,1.044371,0.030668
1057,LTE,440,20,16405,88096192,295,136.615676,36.498784,638,22,...,POINT (136.61568 36.49878),7,8.0,石川県,,,金沢市,17201,1.479955,0.047166
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2877,LTE,440,20,49157,177783143,234,136.628573,36.444329,698,34,...,POINT (136.62857 36.44433),1061,1062.0,石川県,,,白山市,17210,2.116369,0.075719
2878,LTE,440,20,49157,177783145,142,136.627785,36.445504,992,25,...,POINT (136.62779 36.44550),1061,1062.0,石川県,,,白山市,17210,2.116369,0.075719
2879,LTE,440,20,49157,177783148,96,136.619641,36.458359,183,7,...,POINT (136.61964 36.45836),1061,1062.0,石川県,,,白山市,17210,2.116369,0.075719
2880,LTE,440,20,49159,177783652,256,136.648352,36.578797,265,19,...,POINT (136.64835 36.57880),7,8.0,石川県,,,金沢市,17201,1.479955,0.047166


In [30]:
df_ishikawa["市区町村名"].value_counts()

金沢市     164
白山市      89
小松市      56
加賀市      39
野々市市     21
能美市      18
内灘町      12
川北町       2
Name: 市区町村名, dtype: int64