In [6]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import os
from dotenv import load_dotenv
import pandas as pd


In [7]:
# Import scaled earthquake data
df = pd.read_csv('../Resources/earthquake_data.csv')
df.head()

Unnamed: 0,id,time,place,longitude,latitude,depth,magnitude,felt,cdi,mmi,...,t_axis_plunge,percent_double_couple,scalar_moment,tensor_mpp,tensor_mrp,tensor_mrr,tensor_mrt,tensor_mtp,tensor_mtt,soil_density
0,nc30092964,1995-12-28 18:28:01.230000+00:00,"9 km WNW of Topaz Lake, Nevada",-119.6545,38.7145,-1.011,4.8,,,6.1,...,8.496,0.94,1.749e+16,1.717e+16,-2593000000000000.0,-256300000000000.0,-1434000000000000.0,2590000000000000.0,-1.691e+16,1.25
1,nc30092581,1995-12-23 05:39:56.650000+00:00,"8 km WNW of Topaz Lake, Nevada",-119.633,38.7305,-1.081,4.7,,,,...,8.968,0.83,1.175e+16,1.082e+16,1325000000000000.0,-249600000000000.0,4455000000000000.0,1729000000000000.0,-1.057e+16,1.25
2,nc30092506,1995-12-22 09:00:34.560000+00:00,California-Nevada border region,-119.635,38.7215,3.659,4.86,,,,...,3.308,0.53,2.435e+16,2.737e+16,-2897000000000000.0,-1.909e+16,-4715000000000000.0,-1774000000000000.0,-8282000000000000.0,1.25
3,nc30091857,1995-12-13 06:25:54.110000+00:00,"9 km ESE of Gilroy, California",-121.470333,36.982167,4.234,3.8,,,,...,15.899,0.81,742000000000000.0,431700000000000.0,278100000000000.0,-47760000000000.0,-39050000000000.0,554500000000000.0,-384000000000000.0,1.89375
4,nc30094697,1995-12-13 05:45:12.760000+00:00,"9 km ESE of Gilroy, California",-121.47,36.976667,6.204,3.9,,,,...,9.016,0.49,952100000000000.0,395100000000000.0,24890000000000.0,280200000000000.0,133000000000000.0,774300000000000.0,-675300000000000.0,1.89375


In [8]:
# Check the data to make sure it's within expected ranges
print(df[['latitude', 'longitude', 'magnitude']].describe())

          latitude    longitude    magnitude
count  1428.000000  1428.000000  1428.000000
mean     37.649464  -120.811440     3.870301
std       1.932531     2.313600     0.513061
min      31.523667  -124.664000     3.000000
25%      36.576500  -122.743375     3.500000
50%      37.678333  -121.252333     3.790000
75%      38.826375  -118.867625     4.110000
max      41.867700  -111.094000     7.100000


In [13]:

# Filter out non-positive magnitudes
df = df[df['magnitude'] > 0]

# Normalize the magnitudes for sizing
df['magnitude_size'] = (df['magnitude'] - df['magnitude'].min()) / (df['magnitude'].max() - df['magnitude'].min()) * 5 + 1

# Create a scatter plot with Plotly for all data points
fig = px.scatter_mapbox(df, lat='latitude', lon='longitude', 
                        color='magnitude', size='magnitude_size',
                        color_continuous_scale=px.colors.sequential.Turbo, size_max=8,  # Reduce size_max to lower the maximum dot size
                        zoom=1,
                        title="Global Earthquake Data Visualization",
                        height=1000)

# Update the layout with Mapbox style, initial zoom, and legend positioning
fig.update_layout(
    mapbox_style="open-street-map",
    mapbox_zoom=4,  # Initial zoom level to see the entire world
    coloraxis_colorbar={
        'thicknessmode': 'pixels',
        'thickness': 15,
        'lenmode': 'fraction',
        'len': 0.4,
        'x': -0.15,  # Position the color bar on the left (negative value moves it outside the map area)
        'y': 0.5,    # Center the color bar vertically
        'xanchor': 'left',  # Align the color bar relative to its x position
        'yanchor': 'middle',  # Align the color bar relative to its y position
        'title': 'Magnitude',
    }
)

# Show the plot in the notebook
fig.show()

In [10]:
print(df[['latitude', 'longitude', 'magnitude']].head())

    latitude   longitude  magnitude
0  38.714500 -119.654500       4.80
1  38.730500 -119.633000       4.70
2  38.721500 -119.635000       4.86
3  36.982167 -121.470333       3.80
4  36.976667 -121.470000       3.90


In [11]:
us_df = df[(df['latitude'] >= 25.8400) & (df['latitude'] <= 49.3800) & 
           (df['longitude'] >= -124.670) & (df['longitude'] <= -66.9500)]

print(f"Number of data points in the US region: {len(us_df)}")
print(us_df[['latitude', 'longitude', 'magnitude']].head())

Number of data points in the US region: 1428
    latitude   longitude  magnitude
0  38.714500 -119.654500       4.80
1  38.730500 -119.633000       4.70
2  38.721500 -119.635000       4.86
3  36.982167 -121.470333       3.80
4  36.976667 -121.470000       3.90
