# Import Libraries

In [None]:
import pandas as pd
import lightningchart as lc
import numpy as np
from datetime import datetime


# Set license key
lc.set_license("LICENSE_KEY")

# Data Loading and Preprocessing

In [None]:
# Load the dataset
dataset = 'dataset/fuel.csv'
vehicle_fuel = pd.read_csv(dataset)

# Convert units to metric
vehicle_fuel['fuel_efficiency_l_per_100km'] = 235.215 / vehicle_fuel['combined_mpg_ft1'].replace(0, float('nan'))
vehicle_fuel['co2_emissions_g_per_km'] = vehicle_fuel['tailpipe_co2_in_grams_mile_ft1'] / 1.60934

# Drop the original imperial columns if they are no longer needed
vehicle_fuel = vehicle_fuel.drop(columns=['combined_mpg_ft1', 'tailpipe_co2_in_grams_mile_ft1'])

# Display basic information about the updated dataset and check for missing values
vehicle_fuel.info()
vehicle_fuel.isnull().sum()

In [None]:
# Drop NaN values in 'year' and 'fuel_efficiency_l_per_100km' columns
vehicle_fuel_clean = vehicle_fuel.dropna(subset=['year', 'fuel_efficiency_l_per_100km'])

# Remove rows where fuel efficiency is 0 or missing (NaN)
vehicle_fuel_clean = vehicle_fuel_clean[vehicle_fuel_clean['fuel_efficiency_l_per_100km'] > 0]

# Group data by year and calculate the average liters per 100 km per year
fuel_efficiency_trend = vehicle_fuel_clean.groupby('year')['fuel_efficiency_l_per_100km'].mean().reset_index()

# Display the processed data to ensure it's correct
fuel_efficiency_trend.head()

In [None]:
def convert_year_to_unix_timestamps(year_series): 
    return (pd.to_datetime(year_series, format='%Y').astype('int64') // 10**6).tolist() # Convert to milliseconds

# Exploratory Data Analysis (EDA)

## Line & Area Chart

In [None]:
# Convert years to Unix timestamps
x_values = convert_year_to_unix_timestamps(fuel_efficiency_trend['year'])

# Extract y_values as before
y_values = fuel_efficiency_trend['fuel_efficiency_l_per_100km'].tolist()

# Create the chart
chart = lc.ChartXY(
    theme=lc.Themes.TurquoiseHexagon,
    title='Fuel Consumption Trends (L/km) Over Time'
)

# Customize the chart axes labels
x_axis = chart.get_default_x_axis()
x_axis.set_tick_strategy('DateTime')
x_axis.set_title('Year')
chart.get_default_y_axis().set_title('L/km')

# Add line series to the chart
series = chart.add_line_series(data_pattern="ProgressiveX").append_samples(
    x_values=x_values,
    y_values=y_values
)

# Open the chart
chart.open()

In [None]:
# Clean the data and group by year, calculating the average fuel efficiency (MPG)
vehicle_fuel_clean = vehicle_fuel.dropna(subset=['year', 'fuel_efficiency_l_per_100km'])
fuel_efficiency_trend = vehicle_fuel_clean.groupby('year')['fuel_efficiency_l_per_100km'].mean().reset_index()

# Convert MPG to L/100 km using the formula
fuel_efficiency_trend['fuel_efficiency_l_per_100km']

# Convert years to Unix timestamps
x_values = fuel_efficiency_trend['year'] = convert_year_to_unix_timestamps(fuel_efficiency_trend['year'])

# Extract y_values
y_values = fuel_efficiency_trend['fuel_efficiency_l_per_100km'].tolist()

# Create the Positive Area Series chart
chart = lc.ChartXY(
    theme=lc.Themes.TurquoiseHexagon,
    title='Fuel Efficiency Over Time (L/100 km)'
)

# Add the area series
series = chart.add_positive_area_series().add(x_values, y_values)

# Set axis titles
x_axis = chart.get_default_x_axis()
x_axis.set_tick_strategy('DateTime')  # Set the X-axis as DateTime
x_axis.set_title('Year')
chart.get_default_y_axis().set_title('Fuel Efficiency (L/100 km)')

# Open the chart
chart.open()

In [None]:
# Clean the data and drop missing values for 'year', 'fuel_efficiency_l_per_100km', and 'class'
vehicle_fuel_clean = vehicle_fuel.dropna(subset=['year', 'fuel_efficiency_l_per_100km', 'class'])

# Filter to include only the most common vehicle classes
common_vehicle_classes = [
    'Compact Cars', 'Midsize Cars', 'Large Cars', 'Minivan - 2WD', 
    'Sport Utility Vehicle - 2WD', 'Sport Utility Vehicle - 4WD', 
    'Standard Pickup Trucks'
]

vehicle_fuel_filtered = vehicle_fuel_clean[vehicle_fuel_clean['class'].isin(common_vehicle_classes)]

# Group by year and vehicle class, calculating the average fuel efficiency (L/100 km)
fuel_efficiency_class_trend = vehicle_fuel_filtered.groupby(['year', 'class'])['fuel_efficiency_l_per_100km'].mean().unstack()

# Convert years to Unix timestamps for stacked data
x_values = convert_year_to_unix_timestamps(fuel_efficiency_class_trend.index)

# Prepare the stacked data for each vehicle class
class_data = [fuel_efficiency_class_trend[col].fillna(0).tolist() for col in fuel_efficiency_class_trend.columns]

# Calculate stacked data
stacked_data = [class_data[0]]
for i in range(1, len(class_data)):
    stacked_data.append([sum(pair) for pair in zip(stacked_data[i-1], class_data[i])])

# Create the Stacked Area Chart
chart = lc.ChartXY(
    theme=lc.Themes.TurquoiseHexagon,
    title="Fuel Efficiency by Common Vehicle Class Over Time (L/100 km)"
)

# Add area series for each vehicle class stack
for i, class_name in enumerate(fuel_efficiency_class_trend.columns):
    series = chart.add_area_series()
    series.set_name(class_name)
    series.add(x_values, stacked_data[i])

# Customize the X-axis and Y-axis
x_axis = chart.get_default_x_axis()
x_axis.set_title("Year")
x_axis.set_tick_strategy('DateTime')  # Ensure correct year display

chart.get_default_y_axis().set_title("Fuel Efficiency (L/100 km)")

# Add legend
chart.add_legend(data=chart)

# Open the chart
chart.open()

In [None]:
# Drop NaN values in the necessary columns and clean the data for this analysis
vehicle_fuel_clean = vehicle_fuel.dropna(subset=['class', 'fuel_efficiency_l_per_100km'])

# Remove rows where fuel_efficiency_l_per_100km is 0 or missing (NaN)
vehicle_fuel_clean = vehicle_fuel_clean[vehicle_fuel_clean['fuel_efficiency_l_per_100km'] > 0]

# Group data by 'class' and calculate average liters per 100 km per vehicle class
fuel_by_class = vehicle_fuel_clean.groupby('class')['fuel_efficiency_l_per_100km'].mean().reset_index()

In [None]:
# Drop NaN values in the necessary columns and clean the data
vehicle_fuel_clean = vehicle_fuel.dropna(subset=['year', 'co2_emissions_g_per_km'])

# Remove rows where co2_emissions_g_per_km is 0 or missing (NaN)
vehicle_fuel_clean = vehicle_fuel_clean[vehicle_fuel_clean['co2_emissions_g_per_km'] > 0]

# Group data by 'year' and calculate average CO2 emissions per year
co2_emissions_trend = vehicle_fuel_clean.groupby('year')['co2_emissions_g_per_km'].mean().reset_index()

In [None]:
# Convert years to Unix timestamps
x_values = convert_year_to_unix_timestamps(co2_emissions_trend['year'])

# Use the same y_values for CO2 emissions in grams per kilometer
y_values = co2_emissions_trend['co2_emissions_g_per_km'].tolist()

chart = lc.ChartXY(
    theme=lc.Themes.TurquoiseHexagon,
    title='CO2 Emissions Over Time (Grams per Kilometer)'
)

# Add line series with the x and y values
series = chart.add_line_series(data_pattern="ProgressiveX").append_samples(
    x_values=x_values,
    y_values=y_values
)

# Customize the X-axis to use DateTime ticks
x_axis = chart.get_default_x_axis()
x_axis.set_tick_strategy('DateTime')  # Set the X-axis as DateTime
x_axis.set_title('Year')
chart.get_default_y_axis().set_title('CO2 Emissions (g/km)')

# Open the chart
chart.open()

In [None]:
# Clean and filter data, removing rows with missing or very low fuel efficiency values
vehicle_fuel_clean = vehicle_fuel.dropna(subset=['year', 'make', 'fuel_efficiency_l_per_100km'])
vehicle_fuel_clean = vehicle_fuel_clean[vehicle_fuel_clean['fuel_efficiency_l_per_100km'] > 0.01]

# Find the top 10 most common car makes to limit the lines in the chart
top_makes = vehicle_fuel_clean['make'].value_counts().nlargest(10).index
filtered_data = vehicle_fuel_clean[vehicle_fuel_clean['make'].isin(top_makes)]

# Group data by year and make, calculating average fuel efficiency
fuel_efficiency_data = filtered_data.pivot_table(
    index='year', columns='make', values='fuel_efficiency_l_per_100km', aggfunc='mean'
).fillna(0)

# Define a custom color palette
vibrant_colors = [
    lc.Color('white'), lc.Color('salmon'), lc.Color('orange'), lc.Color('limegreen'),
    lc.Color('cyan'), lc.Color('dodgerblue'), lc.Color('purple'), lc.Color('deeppink'),
    lc.Color('yellow'), lc.Color('greenyellow')
]

# Initialize the chart
chart = lc.ChartXY(
    theme=lc.Themes.TurquoiseHexagon,
    title='Fuel Efficiency Over Time by Make (Top 10, L/100km)'
)

# Plot a line for each top car make with colors from the custom palette
for idx, car_make in enumerate(fuel_efficiency_data.columns):
    y_values = fuel_efficiency_data[car_make].tolist()
    series = chart.add_line_series()
    series.set_name(car_make)
    series.append_samples(x_values=x_values, y_values=y_values)
    
    # Set a color from the custom palette
    series.set_line_color(vibrant_colors[idx % len(vibrant_colors)])

# Configure the X-axis for DateTime display
x_axis = chart.get_default_x_axis()
y_axis = chart.get_default_y_axis()
x_axis.set_title('Year')
y_axis.set_title('Fuel Efficiency (L/100km)')
x_axis.set_tick_strategy('DateTime')

# Add a legend to display each car make
chart.add_legend(data=chart)
chart.set_cursor_mode('show-nearest') # Set cursor mode

# Open the chart
chart.open()

## Bar Chart

In [None]:
# Prepare the data in the format expected by LightningChart
data = [
    {'category': row['class'], 'value': row['fuel_efficiency_l_per_100km']} 
    for _, row in fuel_by_class.iterrows()
]

# Create the bar chart
chart = lc.BarChart(
    vertical=True,
    theme=lc.Themes.TurquoiseHexagon,
    title='Fuel Consumption by Vehicle Class (Liters per 100 Kilometers)'
)

# Set sorting to disabled (optional)
chart.set_sorting('disabled')

# Set the data
chart.set_data(data)

# Set label rotation for better readability
chart.set_label_rotation(-90)

# Open the chart
chart.open()

## Heatmap Chart

In [None]:
# Clean and filter data, removing rows with missing or zero fuel efficiency values
vehicle_fuel_clean = vehicle_fuel.dropna(subset=['year', 'make', 'fuel_efficiency_l_per_100km'])
vehicle_fuel_clean = vehicle_fuel_clean[vehicle_fuel_clean['fuel_efficiency_l_per_100km'] > 0.01]

# Extract unique years and sort them to create a list for timestamps
year_labels = sorted(vehicle_fuel_clean['year'].unique())
year_labels_unix = convert_year_to_unix_timestamps(year_labels)

# Group data by year and make, calculating average fuel efficiency in liters per 100 kilometers
fuel_efficiency_data = vehicle_fuel_clean.pivot_table(
    index='year', columns='make', values='fuel_efficiency_l_per_100km', aggfunc='mean'
).fillna(0)

# Convert data to numpy array and get make labels
heatmap_data = fuel_efficiency_data.to_numpy()
make_labels = fuel_efficiency_data.columns.tolist()

# Define grid dimensions
grid_size_x, grid_size_y = int(heatmap_data.shape[0]), int(heatmap_data.shape[1])

# Create the chart
chart = lc.ChartXY(title='Fuel Efficiency by Year and Make (L/100 km)', theme=lc.Themes.TurquoiseHexagon)

# Configure the heatmap with start and end values, steps, and intensity values
heatmap_series = chart.add_heatmap_grid_series(columns=grid_size_x, rows=grid_size_y)
heatmap_series.set_start(x=year_labels_unix[0], y=0)
heatmap_series.set_end(x=year_labels_unix[-1], y=grid_size_y)
heatmap_series.set_step(x=1, y=1)
heatmap_series.set_intensity_interpolation(True)
heatmap_series.invalidate_intensity_values(heatmap_data.tolist())
heatmap_series.hide_wireframe()

# Define and apply a custom color palette for intensity visualization
custom_palette = [
    {"value": float(np.nanmin(heatmap_data)), "color": lc.Color('blue')},
    {"value": float(np.percentile(heatmap_data, 25)), "color": lc.Color('cyan')},
    {"value": float(np.median(heatmap_data)), "color": lc.Color('green')},
    {"value": float(np.percentile(heatmap_data, 75)), "color": lc.Color('yellow')},
    {"value": float(np.nanmax(heatmap_data)), "color": lc.Color('red')}
]
heatmap_series.set_palette_colors(steps=custom_palette, look_up_property='value', interpolate=True)

# Configure axis titles and set the X-axis to use DateTime strategy
x_axis = chart.get_default_x_axis()
y_axis = chart.get_default_y_axis()
x_axis.set_title('Year')
y_axis.set_title('Make')
x_axis.set_tick_strategy('DateTime')

# Add custom ticks for every 10th make on the Y-axis with index numbers
for i in range(0, len(make_labels), 10):  # Step by 10 to select every 10th make
    make = make_labels[i]
    custom_tick = y_axis.add_custom_tick()
    custom_tick.set_value(i)
    custom_tick.set_text(f"{i} - {make}")  # Display index and make name
    custom_tick.set_marker_font(12, family='Segoe UI, sans-serif', weight='bold')

# Hide default Y-axis labels to keep only every 10th make label
y_axis.set_tick_strategy('Empty')

# Add a legend and open the chart
chart.add_legend(data=heatmap_series, title='Fuel Efficiency')
chart.open()

## Pyramid Chart

In [None]:
# Define the emissions color palette using named colors
emissions_colors = [
    lc.Color('darkred'),      # Dark red for very high emissions
    lc.Color('firebrick'),    # Firebrick red
    lc.Color('chocolate'),    # Dark orange for high emissions
    lc.Color('orange'),       # Orange for moderately high emissions
    lc.Color('gold'),         # Gold for mid-range emissions
    lc.Color('greenyellow'),  # Yellow-green for mid to low emissions
    lc.Color('chartreuse'),   # Chartreuse green for lower emissions
    lc.Color('limegreen'),    # Lime green for low emissions
    lc.Color('forestgreen'),  # Forest green for very low emissions
    lc.Color('darkgreen')     # Dark green for the lowest emissions
]

# Filter to only include common vehicle classes
common_classes = [
    'Compact Cars', 'Large Cars', 'Midsize Cars',
    'Minivan - 2WD', 'Minivan - 4WD',
    'Sport Utility Vehicle - 2WD', 'Sport Utility Vehicle - 4WD',
    'Standard Pickup Trucks', 'Standard Pickup Trucks 2WD', 'Standard Pickup Trucks 4WD'
]
filtered_data = vehicle_fuel_clean[vehicle_fuel_clean['class'].isin(common_classes)]

# Calculate the average CO2 emissions for these common vehicle classes in grams per kilometer
vehicle_co2_emissions = filtered_data.groupby('class')['co2_emissions_g_per_km'].mean().reset_index()

# Prepare the data for the Pyramid Chart with metric units
pyramid_data = [
    {'name': row['class'], 'value': row['co2_emissions_g_per_km']}
    for _, row in vehicle_co2_emissions.iterrows()
]

# Sort data in descending order to have the largest CO2 emissions at the top
pyramid_data = sorted(pyramid_data, key=lambda x: x['value'], reverse=True)

# Create the Pyramid Chart
chart = lc.PyramidChart(
    slice_mode='height',
    theme=lc.Themes.TurquoiseHexagon,
    title='Common Vehicle Class Breakdown by CO2 Emissions (g/km)'
)

# Add the data slices to the pyramid chart
chart.add_slices(pyramid_data)

# Apply the custom color palette to the slices
chart.set_slice_colors(emissions_colors)

# Add a legend to explain the chart's purpose
chart.add_legend().add(chart).set_title('CO2 Emissions (g/km)')

# Open the chart
chart.open()

## Donut Chart

In [None]:
# Calculate the count breakdown of fuel types
fuel_type_breakdown = vehicle_fuel['fuel_type'].value_counts().reset_index()
fuel_type_breakdown.columns = ['fuel_type', 'count']  # Rename column to 'count'

# Prepare the data for the Donut Chart
donut_data = [
    {'name': row['fuel_type'], 'value': row['count']}
    for _, row in fuel_type_breakdown.iterrows()
]

# Create the Donut Chart
chart = lc.PieChart(
    labels_inside_slices=False,  # Display labels inside slices
    title='Fuel Type Breakdown',
    theme=lc.Themes.TurquoiseHexagon
)

# Add the data slices to the chart
chart.add_slices(donut_data)

# Customize the chart appearance
chart.set_label_formatter('NamePlusValue')
chart.set_inner_radius(50)  # Adjust for the donut chart style

# Add a legend to the chart
legend = chart.add_legend(data=chart).set_title('Fuel Types')

# Open the chart
chart.open()

## Treemap Chart

In [None]:
# Calculate the average fuel efficiency in L/100 km by vehicle class for all available classes
average_l_per_100km_by_class = vehicle_fuel.groupby('class')['fuel_efficiency_l_per_100km'].mean().dropna().sort_values()

# Prepare data for the treemap with L/100 km values
data = [{'name': vehicle_class, 'value': l_per_100km} for vehicle_class, l_per_100km in average_l_per_100km_by_class.items()]

# Create the TreeMapChart
chart = lc.TreeMapChart(
    theme=lc.Themes.TurquoiseHexagon,
    title="Average Fuel Efficiency by Vehicle Class (L/100 km)"
)

# Set node coloring based on L/100 km values
chart.set_node_coloring(
    steps=[
        {'value': average_l_per_100km_by_class.max(), 'color': lc.Color('red')},    # Higher L/100 km (less efficient)
        {'value': average_l_per_100km_by_class.mean(), 'color': lc.Color('yellow')},
        {'value': average_l_per_100km_by_class.min(), 'color': lc.Color('green')},  # Lower L/100 km (more efficient)
    ],
)

# Set the data
chart.set_data([{'name': 'Vehicle Classes', 'children': data}])

# Open the chart
chart.open()