<a href="https://colab.research.google.com/github/hyuk52044-beep/TW-earthquake/blob/main/earthquake.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# Step 1: Read the earthquake data file
# Specify the file path where the earthquake data is stored.
# Replace 'earthquake.csv' with the actual path if needed.
url = "https://raw.githubusercontent.com/hyuk52044-beep/TW-earthquake/refs/heads/main/earthquake.csv"
earthquake_data = pd.read_csv(url)
earthquake_data.head()


Unnamed: 0,time,latitude,longitude,depth,mag,nst,gap,dmin,rms,updated,place,type,horizontalError,depthError,magError,magNst
0,2024-12-31T18:52:07.239Z,24.3008,121.7217,25.648,3.9,48,55,0.172,0.73,2024-12-31T20:20:57.040Z,"37 km NNE of Hualien City, Taiwan",earthquake,1.63,4.704,0.062,25
1,2024-12-29T19:51:35.909Z,23.5333,120.6525,20.064,4.7,66,64,0.233,0.83,2024-12-30T06:53:03.127Z,"21 km ENE of Pizitou, Taiwan",earthquake,3.24,5.246,0.079,49
2,2024-12-29T15:22:08.172Z,23.398,121.8325,10.0,4.5,25,101,0.492,0.84,2024-12-29T20:47:00.096Z,"68 km SSE of Hualien City, Taiwan",earthquake,4.25,1.914,0.15,13
3,2024-12-26T08:08:50.808Z,23.8875,121.7688,25.893,4.7,58,70,0.326,0.67,2024-12-29T20:13:03.387Z,"19 km ESE of Hualien City, Taiwan",earthquake,3.24,4.898,0.062,25
4,2024-12-11T20:05:49.112Z,23.9428,121.7884,25.893,4.1,60,69,0.29,0.73,2024-12-11T21:02:32.040Z,"19 km ESE of Hualien City, Taiwan",earthquake,3.52,2.787,0.065,23


In [2]:
# Step 2: Inspect the data
# Display the first five rows of the dataset to understand its structure and content.
print("Displaying the first five rows of the earthquake dataset:")
print(earthquake_data.head())

Displaying the first five rows of the earthquake dataset:
                       time  latitude  longitude   depth  mag  nst  gap  \
0  2024-12-31T18:52:07.239Z   24.3008   121.7217  25.648  3.9   48   55   
1  2024-12-29T19:51:35.909Z   23.5333   120.6525  20.064  4.7   66   64   
2  2024-12-29T15:22:08.172Z   23.3980   121.8325  10.000  4.5   25  101   
3  2024-12-26T08:08:50.808Z   23.8875   121.7688  25.893  4.7   58   70   
4  2024-12-11T20:05:49.112Z   23.9428   121.7884  25.893  4.1   60   69   

    dmin   rms                   updated                              place  \
0  0.172  0.73  2024-12-31T20:20:57.040Z  37 km NNE of Hualien City, Taiwan   
1  0.233  0.83  2024-12-30T06:53:03.127Z       21 km ENE of Pizitou, Taiwan   
2  0.492  0.84  2024-12-29T20:47:00.096Z  68 km SSE of Hualien City, Taiwan   
3  0.326  0.67  2024-12-29T20:13:03.387Z  19 km ESE of Hualien City, Taiwan   
4  0.290  0.73  2024-12-11T21:02:32.040Z  19 km ESE of Hualien City, Taiwan   

         type  h

In [3]:
# Step 3: Check the data
# Check the summary of the dataset to understand column types and non-null counts.
print("\nDataset summary:")
print(earthquake_data.info())

# Generate descriptive statistics for numerical columns in the dataset.
print("\nDescriptive statistics for numerical columns:")
print(earthquake_data.describe())

# Check for missing values in the dataset to ensure data completeness.
print("\nChecking for missing values in each column:")
print(earthquake_data.isnull().sum())

# If missing values are detected, consider removing or imputing them.
# Uncomment the following line to drop rows with missing values.
# earthquake_data.dropna(inplace=True)


Dataset summary:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 513 entries, 0 to 512
Data columns (total 16 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   time             513 non-null    object 
 1   latitude         513 non-null    float64
 2   longitude        513 non-null    float64
 3   depth            513 non-null    float64
 4   mag              513 non-null    float64
 5   nst              513 non-null    int64  
 6   gap              513 non-null    int64  
 7   dmin             513 non-null    float64
 8   rms              513 non-null    float64
 9   updated          513 non-null    object 
 10  place            513 non-null    object 
 11  type             513 non-null    object 
 12  horizontalError  513 non-null    float64
 13  depthError       513 non-null    float64
 14  magError         513 non-null    float64
 15  magNst           513 non-null    int64  
dtypes: float64(9), int64(3), object(4)
memory us

In [4]:
# Step 4: Ensure the data types are correct
# Convert latitude, longitude, and magnitude columns to float if necessary.
earthquake_data['latitude'] = earthquake_data['latitude'].astype(float)
earthquake_data['longitude'] = earthquake_data['longitude'].astype(float)
earthquake_data['mag'] = earthquake_data['mag'].astype(float)

In [5]:
# Step 5: Extract geographic and magnitude information
# Latitude, longitude, and magnitude are key variables for visualization.
latitude = earthquake_data['latitude']
longitude = earthquake_data['longitude']
magnitude = earthquake_data['mag']

In [6]:
# Step 6: Create an interactive map to visualize earthquake data
# Use a scatter mapbox plot to plot earthquakes with size and color based on magnitude.
fig = px.scatter_mapbox(
    earthquake_data,
    lat="latitude",  # Specify the latitude column
    lon="longitude",  # Specify the longitude column
    size="mag",  # Use magnitude to define the size of the points
    color="mag",  # Color points based on magnitude
    color_continuous_scale=px.colors.sequential.YlOrRd,  # Use a yellow-red color scale
    size_max=10,  # Limit the maximum size of points
    zoom=6,  # Set an appropriate zoom level for Taiwan region
    center={"lat": 23.5, "lon": 121},  # Center the map on Taiwan
    title="Earthquake Data from 2023 to 2024 in Taiwan with OpenStreetMap"
)

# Update map style and layout settings
fig.update_layout(
    mapbox_style="open-street-map",  # Use OpenStreetMap for the map style
    title_font_size=20,  # Set the font size of the title
    margin={"r": 0, "t": 40, "l": 30, "b": 30},  # Adjust the map margins
    height=600,  # Set the height of the map
    width=800  # Set the width of the map
)

# Display the interactive map
fig.show()

In [7]:
# Step 7: Analyze the distribution of earthquake magnitudes
# Create a histogram to visualize the distribution of earthquake magnitudes.
fig = px.histogram(
    earthquake_data,
    x='mag',  # Magnitude on the x-axis
    nbins=30,  # Set the number of bins to 30
    title='Magnitude Distribution of Earthquakes',
    labels={'mag': 'Magnitude'},  # Label for the x-axis
    color_discrete_sequence=['#FFA07A']  # Set a custom color for the bars
)

# Display the histogram
fig.show()

In [8]:
# Step 8: Explore the relationship between depth and magnitude
# Use a scatter plot to analyze how depth relates to magnitude.
fig = px.scatter(
    earthquake_data,
    x='depth',  # Depth on the x-axis
    y='mag',  # Magnitude on the y-axis
    title='Relationship between Depth and Magnitude',
    labels={'depth': 'Depth (km)', 'mag': 'Magnitude'},
    color='mag',  # Color points based on magnitude
    color_continuous_scale=px.colors.sequential.Viridis  # Use a Viridis color scale
)

# Display the scatter plot
fig.show()

In [9]:
# Step 9: Analyze temporal trends in earthquake occurrence (Monthly Analysis)
# Convert the 'time' column to datetime format to enable time-based analysis.
earthquake_data['time'] = pd.to_datetime(earthquake_data['time'])

# Extract the month and year for grouping purposes.
earthquake_data['month'] = earthquake_data['time'].dt.to_period('M')

# Calculate the monthly count of earthquakes by grouping data by month.
monthly_counts = earthquake_data.groupby('month').size().reset_index(name='count')

# Convert the 'month' column back to a datetime format for plotting.
monthly_counts['month'] = monthly_counts['month'].dt.to_timestamp()

# Visualize the monthly earthquake counts using a line chart.
fig = px.line(
    monthly_counts,
    x='month',  # Month on the x-axis
    y='count',  # Earthquake count on the y-axis
    title='Monthly Earthquake Counts from 2023 to 2024',
    labels={'month': 'Month', 'count': 'Number of Earthquakes'}  # Axis labels
)

# Display the line chart
fig.show()


Converting to PeriodArray/Index representation will drop timezone information.

