# Import Libraries

In [None]:
import pandas as pd
import lightningchart as lc
import time
from datetime import datetime

lc.set_license("LICENSE_KEY")

# Data Loading and Preprocessing

In [None]:
# Define the file path for the dataset containing data from Japan between 2001 and 2018
file_path_japan_2001_2018 = 'datasets/Japan_2001-2018.csv'

# Define the file path for the dataset containing data from Japan between 2000 and 2023
file_path_japan_2000_2023 = 'datasets/Japan_2000_2023.csv'

# Load the data from the first file (2001-2018) into a pandas DataFrame
data_japan_2001_2018 = pd.read_csv(file_path_japan_2001_2018)

# Load the data from the second file (2000-2023) into another pandas DataFrame
data_japan_2000_2023 = pd.read_csv(file_path_japan_2000_2023)

In [None]:
# Convert the 'time' column to datetime format
data_japan_2001_2018['time'] = pd.to_datetime(data_japan_2001_2018['time'], utc=True)
data_japan_2000_2023['time'] = pd.to_datetime(data_japan_2000_2023['time'], utc=True)

# Filter and combine the datasets
data_japan_old_filtered = data_japan_2001_2018[data_japan_2001_2018['time'] < '2019-01-01']
data_japan_new_filtered = data_japan_2000_2023[data_japan_2000_2023['time'] >= '2000-01-01']
combined_data = pd.concat([data_japan_old_filtered, data_japan_new_filtered])

# Remove duplicates based on the 'time' column and create a copy to avoid SettingWithCopyWarning
cleaned_combined_data = combined_data.drop_duplicates(subset=['time']).copy()

# Verify that duplicates have been removed
remaining_duplicates = cleaned_combined_data.duplicated(subset=['time']).sum()
remaining_duplicates

# Exploratory Data Analysis (EDA)

## Geographic Distribution

In [None]:
# Extract the longitude and latitude values from the cleaned combined dataset and convert them to a list
x_values = cleaned_combined_data['longitude'].values.tolist()
y_values = cleaned_combined_data['latitude'].values.tolist()

chart = lc.ScatterChart(
    theme=lc.Themes.Dark,
    title='Geographic Distribution of Seismic Events in Japan (2000 - 2023)',
    point_shape='circle',
    xlabel='Longitude',
    ylabel='Latitude',
    individual_colors=False  # Disable individual colors
)

# Append the samples using the original geographic coordinates
series = chart.series.append_samples(
    x_values=x_values,
    y_values=y_values
)

# Set the color for the entire series to a single color, such as bright blue
series.set_point_color(lc.Color(0, 0, 255))

# Open the chart with the specified width and height (800x800 pixels)
chart.open(width=800, height=800)

### 3D Visualization of Geopgraphic Distribution

In [None]:
# Extract longitude, latitude, depth, and magnitude values from the dataset
x_values = cleaned_combined_data['longitude'].tolist()
y_values = cleaned_combined_data['latitude'].tolist()
z_values = cleaned_combined_data['depth'].tolist()
magnitude_values = cleaned_combined_data['mag'].tolist()  # Magnitude values for coloring

# Normalize the magnitude values to a range between 0 and 1 for color mapping
min_mag = min(magnitude_values)
max_mag = max(magnitude_values)
lookup_values = [(m - min_mag) / (max_mag - min_mag) for m in magnitude_values]

# Create a 3D chart with a dark theme
chart = lc.Chart3D(
    theme=lc.Themes.Dark,
    title='3D Visualization of Seismic Events (Longitude, Latitude, Depth)'
)

# Set titles for the axes
chart.get_default_x_axis().set_title("Longitude")
chart.get_default_y_axis().set_title("Latitude")
chart.get_default_z_axis().set_title("Depth (km)")

# Create a point series with individual controls for size and color enabled
series = chart.add_point_series(
    render_2d=False,  # Ensure the points are rendered in 3D
    individual_lookup_values_enabled=True,
    individual_point_size_axis_enabled=True,
    individual_point_size_enabled=True  # Enable individual point size adjustments
)

# Set the shape of the points to be spherical
series.set_point_shape('sphere')

# Define a color palette for the points based on the normalized magnitude values
series.set_palette_point_colors(
    steps=[
        {'value': 0.0, 'color': lc.Color(0, 0, 255)},     # Blue for the lowest magnitudes
        {'value': 0.25, 'color': lc.Color(0, 255, 0)},    # Green for lower-mid magnitudes
        {'value': 0.5, 'color': lc.Color(255, 255, 0)},   # Yellow for medium magnitudes
        {'value': 0.75, 'color': lc.Color(255, 165, 0)},  # Orange for higher-mid magnitudes
        {'value': 1.0, 'color': lc.Color(255, 0, 0)}      # Red for the highest magnitudes
    ],
    look_up_property='value',  # Use 'value' property (normalized magnitude) for color mapping
    interpolate=True,  # Interpolate colors between the defined steps for smooth transitions
    percentage_values=True  # Interpret 'value' as a percentage of the normalized range (0 to 1)
)

# Prepare the data for the chart
data = [
    {
        'x': x_values[i],  # Longitude
        'y': y_values[i],  # Latitude
        'z': z_values[i],  # Depth
        'size': 7 if magnitude_values[i] > 6.0 else 4,  # Size points larger for magnitudes > 6.0
        'value': lookup_values[i]  # Normalized magnitude value for color lookup
    }
    for i in range(len(x_values))
]

# Add the prepared data to the point series
series.add(data)

# Opens the chart in a web browser window for better interactive viewing
chart.open(method="browser")

## Events Over Time

In [None]:
# Extract the year from the 'time' column and create a new 'year' column in the DataFrame
cleaned_combined_data['year'] = cleaned_combined_data['time'].dt.year

# Count the number of seismic events per year, sort the counts by year, and store them in a variable
event_counts_per_year = cleaned_combined_data['year'].value_counts().sort_index()

# Convert each year to milliseconds since the Unix epoch (January 1, 1970) for the x-axis values
xValues = [
    int(time.mktime(datetime(year, 1, 1).timetuple()) * 1000) 
    for year in event_counts_per_year.index.tolist()
]

# Get the corresponding event counts as the y-axis values
yValues = event_counts_per_year.values.tolist()

# Create an XY chart with a dark theme and a title
chart = lc.ChartXY(
    theme=lc.Themes.Dark,
    title='Seismic Events Over Time'
)

# Remove the default x-axis to customize it
chart.get_default_x_axis().dispose()

# Add a new high-precision linear x-axis to handle the date values
x_axis = chart.add_x_axis(axis_type='linear-highPrecision')

# Set the tick strategy to 'DateTime' to appropriately format the x-axis labels
x_axis.set_tick_strategy('DateTime')

# Set the scroll strategy to 'progressive' for better visualization of data over time
x_axis.set_scroll_strategy('progressive')

# Set the interval for the x-axis based on the minimum and maximum values of xValues (milliseconds)
x_axis.set_interval(start=min(xValues),
    end=max(xValues),
    stop_axis_after=False  # Keep the axis open-ended to allow further extension
  )

# Set the title for the x-axis
x_axis.set_title("Year")

# Set the title for the y-axis
chart.get_default_y_axis().set_title("Events")

# Add a series to the chart to plot the seismic events, using the x and y values
series = chart.add_point_line_series().append_samples(
    x_values=xValues,
    y_values=yValues
)

# Set the color of the points in the series to bright red (RGB: 255, 0, 0)
series.set_point_color(lc.Color(255, 0, 0))

# Set the thickness of the line connecting the points to 2 units for better visibility
series.set_line_thickness(2)

chart.open()

## Depth VS. Magnitude

In [None]:
# Extract depth values from the DataFrame and convert them to a list for the x-axis
xValues = cleaned_combined_data['depth'].tolist()

# Extract magnitude values from the DataFrame and convert them to a list for the y-axis
yValues = cleaned_combined_data['mag'].tolist()

# Create a list of magnitude values to be used for color lookup in the chart
lookupValues = cleaned_combined_data['mag'].tolist()

# Create a scatter chart with a dark theme, titled "Depth vs. Magnitude"
chart = lc.ScatterChart(
    theme=lc.Themes.Dark,
    title='Depth vs. Magnitude',
    point_shape='triangle',
    xlabel='Depth (km)',
    ylabel='Magnitude',
    individual_colors=True,  # Enable individual colors for each point
    individual_lookup_values=True  # Enable the use of lookup values for coloring points
)

# Append the depth and magnitude data to the chart as samples, with lookup values for coloring
series = chart.series.append_samples(
    x_values=xValues,
    y_values=yValues,
    lookup_values=lookupValues
)

# Set the color palette for the points based on magnitude values
series.set_palette_colors(
    steps=[
        {'value': min(lookupValues), 'color': lc.Color(255, 255, 0)},  # Yellow for lower magnitudes
        {'value': (min(lookupValues) + max(lookupValues)) / 2, 'color': lc.Color(0, 255, 255)},  # Cyan for mid-range magnitudes
        {'value': max(lookupValues), 'color': lc.Color(255, 0, 0)},  # Red for higher magnitudes
    ],
    look_up_property='value',  # Specify that the palette uses 'value' from lookupValues for coloring
    percentage_values=False  # Use absolute values, not percentages, for color steps
)

# Add a legend to the chart to explain the color coding
legend = chart.add_legend(data=chart).set_title('Magnitude')

chart.open()

## Monthly Frequency

In [None]:
import calendar

# Extract the month from the 'time' column in the DataFrame and create a new 'month' column
cleaned_combined_data['month'] = cleaned_combined_data['time'].dt.month

# Group the data by the 'month' column and count the number of seismic events for each month
monthly_event_counts = cleaned_combined_data.groupby('month').size()

# Create a list of dictionaries where each dictionary represents a month and its corresponding event count
# The 'category' key uses the full month name (e.g., 'January') and 'value' is the event count
data = [{'category': calendar.month_name[month], 'value': count} for month, count in monthly_event_counts.items()]

# Filter the dataset to only include events that occurred in March 2011
march_2011_data = cleaned_combined_data[(cleaned_combined_data['year'] == 2011) & (cleaned_combined_data['month'] == 3)]

# Count the number of seismic events that occurred in March 2011
march_2011_event_count = march_2011_data.shape[0]

# Print the number of seismic events in March 2011
print(f"Number of seismic events in March 2011: {march_2011_event_count}")

# Create a vertical bar chart with a dark theme
chart = lc.BarChart(
    vertical=True,  
    theme=lc.Themes.Dark,
    title='Monthly Frequency of Events in Japan'
)

# Disable sorting to display the months in their natural order (January to December)
chart.set_sorting('disabled')

# Set the chart data using the list of dictionaries, where 'category' is the month name and 'value' is the event count
chart.set_data(data)

chart.open()

## Monthly Average Magnitude

In [None]:
# Group the data by the 'month' column and calculate the mean magnitude for each month
monthly_avg_magnitude = cleaned_combined_data.groupby('month')['mag'].mean()

# Create a list of dictionaries containing the month name and corresponding average magnitude
data = [{'category': calendar.month_name[month], 'value': count} for month, count in monthly_avg_magnitude.items()]

# Create a vertical bar chart with a dark theme, titled "Average Magnitude of Each Month"
chart = lc.BarChart(
    vertical=True,  # Set the chart orientation to vertical
    theme=lc.Themes.Dark,
    title='Average Magnitude of Each Month'
)

# Disable sorting to keep the months in their natural order (January to December)
chart.set_sorting('disabled')

# Set the chart data using the list of dictionaries, where 'category' is the month name and 'value' is the average magnitude
chart.set_data(data)

chart.open()

## Events by Region

In [None]:
# Define the geographic boundaries for different regions in Japan
# Each region is defined by its minimum and maximum latitude and longitude
regions = {
    'Hokkaidō': {'lat_min': 41, 'lat_max': 45.5, 'lon_min': 139, 'lon_max': 146},
    'Tōhoku': {'lat_min': 36.5, 'lat_max': 41.5, 'lon_min': 139, 'lon_max': 142},   
    'Kantō': {'lat_min': 34, 'lat_max': 37, 'lon_min': 138, 'lon_max': 141}, 
    'Chūbu': {'lat_min': 34, 'lat_max': 38.5, 'lon_min': 136, 'lon_max': 139},  
    'Kansai': {'lat_min': 33.5, 'lat_max': 36, 'lon_min': 134, 'lon_max': 137},    
    'Chūgoku': {'lat_min': 33.5, 'lat_max': 36.5, 'lon_min': 130.5, 'lon_max': 134},  
    'Shikoku': {'lat_min': 32.5, 'lat_max': 34.5, 'lon_min': 132, 'lon_max': 135},  
    'Kyūshū & Okinawa': {'lat_min': 23.5, 'lat_max': 34, 'lon_min': 123.5, 'lon_max': 132}, 
    'North East Shore': {'lat_min': 42, 'lat_max': 50, 'lon_min': 145.5, 'lon_max': 155.5},   
    'North West Shore': {'lat_min': 37.5, 'lat_max': 43, 'lon_min': 130, 'lon_max': 139},   
    'East Shore': {'lat_min': 35, 'lat_max': 42, 'lon_min': 141, 'lon_max': 150},   
    'South East Shore': {'lat_min': 20, 'lat_max': 35, 'lon_min': 135, 'lon_max': 150},   
}

# Function to assign a region to each row based on latitude and longitude
def assign_region(row):
    for region, bounds in regions.items():
        # Check if the latitude and longitude of the row fall within the bounds of the region
        if bounds['lat_min'] <= row['latitude'] <= bounds['lat_max'] and bounds['lon_min'] <= row['longitude'] <= bounds['lon_max']:
            return region  # Return the region name if the location matches the bounds
    return 'Other'  # Return 'Other' if no region matches the location

# Apply the function to the DataFrame to assign regions based on latitude and longitude
cleaned_combined_data['region'] = cleaned_combined_data.apply(assign_region, axis=1)

### Distribution by Region

In [None]:
# Group the data by the newly created 'region' column and count the number of seismic events in each region
region_event_counts = cleaned_combined_data.groupby('region').size()

# Create a list of dictionaries with each region's name and the corresponding event count
data = [{'category': region, 'value': count} for region, count in region_event_counts.items()]

# Create a vertical bar chart with a dark theme, titled "Distribution of Seismic Events by Region"
chart = lc.BarChart(
    vertical=True,  # Set the chart orientation to vertical
    theme=lc.Themes.Dark,  # Use a dark theme for the chart
    title='Distribution of Seismic Events by Region'  # Set the title of the chart
)

# Set the sorting of the bars in descending order, so regions with more events are shown first
chart.set_sorting('descending')

# Set the data for the chart using the list of dictionaries, where 'category' is the region name and 'value' is the event count
chart.set_data(data)

chart.open()

### Events Over Time by Region 

In [None]:
# Extract the year from the 'time' column and create a new 'year' column in the DataFrame
cleaned_combined_data['year'] = cleaned_combined_data['time'].dt.year

# Group the data by both 'year' and 'region', and count the number of seismic events for each combination
# Unstack the DataFrame to create columns for each region, filling missing values with 0
year_region_event_counts = cleaned_combined_data.groupby(['year', 'region']).size().unstack(fill_value=0)

# Create an XY chart with a dark theme, titled "Seismic Events Over Time by Region"
chart = lc.ChartXY(
    theme=lc.Themes.Dark,
    title='Seismic Events Over Time by Region'
)

# Remove the default x-axis to configure a custom high-precision linear x-axis
chart.get_default_x_axis().dispose() 
x_axis = chart.add_x_axis(axis_type='linear-highPrecision')

# Set the tick strategy to 'DateTime' to appropriately format the x-axis labels
x_axis.set_tick_strategy('DateTime')

# Set the scroll strategy to 'progressive' for better visualization of data over time
x_axis.set_scroll_strategy('progressive')

# Determine the overall time range for the x-axis based on the minimum and maximum years in the data
min_year = year_region_event_counts.index.min()
max_year = year_region_event_counts.index.max()

# Convert the minimum and maximum years to milliseconds since the Unix epoch
min_x_value = int(time.mktime(datetime(min_year, 1, 1).timetuple()) * 1000)
max_x_value = int(time.mktime(datetime(max_year, 1, 1).timetuple()) * 1000)

# Set the interval for the x-axis based on the converted minimum and maximum years
x_axis.set_interval(start=min_x_value, end=max_x_value, stop_axis_after=False)

# Set the title for the x-axis
x_axis.set_title("Year")

# Set the title for the y-axis
chart.get_default_y_axis().set_title("Events")

# Loop through each region and create a line series for the number of events per year
for region in year_region_event_counts.columns:
    # Convert the years to milliseconds for the x-axis values
    x_values = [
        int(time.mktime(datetime(year, 1, 1).timetuple()) * 1000) 
        for year in year_region_event_counts.index.tolist()
    ]
    
    # Get the event counts for this region as the y-axis values
    y_values = year_region_event_counts[region].tolist()
    
    # Add a line series to the chart for the current region
    series = chart.add_line_series().append_samples(
        x_values=x_values,
        y_values=y_values
    )

    # Set the line thickness to 2 for better visibility
    series.set_line_thickness(2)
    
    # Set the name of the series to the region name for identification in the legend
    series.set_name(region)

# Add a legend to the chart to explain which line corresponds to which region
legend = chart.add_legend()

# Attach all elements within the chart to the legend for display
legend.add(chart)

chart.open()