# Earthquake Data Cleaning and Preparation

### This notebook loads earthquake data from a CSV file, converts dates to datetime format, removes unnecessary columns, transforms coordinates into decimal degrees, and renames columns in English for use in GIS tools like QGIS.  

In [2]:
import pandas as pd

In [3]:
df = pd.read_csv("/Users/yuta-uebayashi/Desktop/Lede課題一覧/Lede個人プロジェクト/project3/Data/Earthquake Records Since 2000.csv")
df.head()

Unnamed: 0,地震の発生日,地震の発生時刻,震央地名,緯度,経度,深さ,Ｍ,最大震度
0,2025/07/06,14:07:03.9,トカラ列島近海,29°21.8′N,129°30.7′E,23 km,5.5,震度５強
1,2025/07/06,14:01:21.3,トカラ列島近海,29°20.9′N,129°30.3′E,21 km,4.9,震度５強
2,2025/07/05,06:29:39.5,トカラ列島近海,29°20.1′N,129°28.2′E,19 km,5.4,震度５強
3,2025/07/03,16:13:01.3,トカラ列島近海,29°21.4′N,129°31.3′E,20 km,5.5,震度６弱
4,2024/08/08,16:42:55.5,日向灘,31°44.2′N,131°43.3′E,31 km,7.1,震度６弱


In [4]:
df.dtypes

地震の発生日      object
地震の発生時刻     object
震央地名        object
緯度          object
経度          object
深さ          object
Ｍ          float64
最大震度        object
dtype: object

In [5]:
# データを整形
df["地震の発生日"] = pd.to_datetime(df["地震の発生日"])
df = df.drop(columns=["地震の発生時刻", "深さ", "最大震度"])

In [6]:
# 緯度経度を修正
def dms_to_decimal(coord):
    """
    '29°21.8'N' や '129°30.7'E' のような形式を 10進数(float) に変換する
    """
    import re
    match = re.match(r"(\d+)°(\d+\.?\d*)[′']?([NSEW])", coord)
    if match:
        degrees = float(match.group(1))
        minutes = float(match.group(2))
        direction = match.group(3)

        decimal = degrees + minutes / 60

        if direction in ['S', 'W']:
            decimal *= -1

        return decimal
    else:
        return None

In [7]:
df.isnull().sum()

地震の発生日    0
震央地名      0
緯度        0
経度        0
Ｍ         0
dtype: int64

In [8]:
df["lat"] = df["緯度"].apply(dms_to_decimal)
df["lon"] = df["経度"].apply(dms_to_decimal)

In [9]:
#　元データのコラムを削除
df = df.drop(columns=["緯度", "経度"])

In [10]:
df.head()

Unnamed: 0,地震の発生日,震央地名,Ｍ,lat,lon
0,2025-07-06,トカラ列島近海,5.5,29.363333,129.511667
1,2025-07-06,トカラ列島近海,4.9,29.348333,129.505
2,2025-07-05,トカラ列島近海,5.4,29.335,129.47
3,2025-07-03,トカラ列島近海,5.5,29.356667,129.521667
4,2024-08-08,日向灘,7.1,31.736667,131.721667


In [11]:
df.dtypes

地震の発生日    datetime64[ns]
震央地名              object
Ｍ                float64
lat              float64
lon              float64
dtype: object

In [12]:
# コラム名を変更
df = df.rename(columns={
    "地震の発生日": "Date",
    "震央地名": "Place",
    "M": "Magnitude"
})

In [13]:
# ｃｓｖに出力
df.to_csv("earthquake_cleanning_data.csv", index=False, encoding="utf-8")