In [14]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio

In [15]:
# Set a default template for all plots
pio.templates.default = "plotly_dark"

base_dir = 'star_plots/'

In [16]:
# Load the cleaned star data from CSV
star_df = pd.read_csv('cleaned_star_data.csv')

# Display first few rows of the dataframe
# star_df.head()

In [17]:
# star_df.info()

In [18]:
# Map numeric Star type to category names if needed
star_type_mapping = {
    0: 'Brown Dwarf',
    1: 'Red Dwarf',
    2: 'White Dwarf',
    3: 'Main Sequence',
    4: 'Supergiants',
    5: 'Hypergiants'
}

In [19]:
# Check if Star type is numeric and needs mapping
if pd.api.types.is_numeric_dtype(star_df['Star type']):
    star_df['Star type'] = star_df['Star type'].map(star_type_mapping)

In [20]:
star_df

Unnamed: 0,Temperature (K),Luminosity(L/Lo),Radius(R/Ro),Absolute magnitude(Mv),Star type,Star color,Spectral Class
0,3068,0.002400,0.1700,16.12,Brown Dwarf,Red,M
1,3042,0.000500,0.1542,16.60,Brown Dwarf,Red,M
2,2600,0.000300,0.1020,18.70,Brown Dwarf,Red,M
3,2800,0.000200,0.1600,16.65,Brown Dwarf,Red,M
4,1939,0.000138,0.1030,20.06,Brown Dwarf,Red,M
...,...,...,...,...,...,...,...
235,38940,374830.000000,1356.0000,-9.93,Hypergiants,Blue,O
236,30839,834042.000000,1194.0000,-10.63,Hypergiants,Blue,O
237,8829,537493.000000,1423.0000,-10.73,Hypergiants,White,A
238,9235,404940.000000,1112.0000,-11.23,Hypergiants,White,A


In [21]:
# 1. Bar chart to visualize the count of stars with respective type
star_type_counts = star_df['Star type'].value_counts().reset_index()
star_type_counts.columns = ['Star type', 'Count']

# Sort the data to match the original visualization order
star_type_counts['Order'] = star_type_counts['Star type'].map({
    'Brown Dwarf': 0,
    'Red Dwarf': 1,
    'White Dwarf': 2, 
    'Main Sequence': 3,
    'Supergiants': 4,
    'Hypergiants': 5
})
star_type_counts = star_type_counts.sort_values('Order')

# Create the customized bar chart
fig_bar = px.bar(
    star_type_counts, 
    x='Star type', 
    y='Count',
    title='Visualize star count per star type',
    color='Star type',
    color_discrete_sequence=['brown', 'red', 'white', 'yellow', 'lightskyblue', 'orange']
)

# Customize further
fig_bar.update_layout(
    title_font=dict(color='royalblue', size=20, family='Arial', weight='bold'),
    title_x=0.5,
    plot_bgcolor='rgba(0,0,0,0.8)',
    paper_bgcolor='rgba(0,0,0,0.8)', 
    yaxis_title="# of Stars",
    xaxis_title="",
    xaxis=dict(tickangle=45, tickfont=dict(color='lime')),
    yaxis=dict(tickfont=dict(color='pink')),
)

# Add labels on top of bars
for i, row in star_type_counts.iterrows():
    fig_bar.add_annotation(
        x=row['Star type'],
        y=row['Count'],
        text=str(row['Count']),
        font=dict(color='red', size=14, weight='bold'),
        showarrow=False,
        yshift=10
    )

fig_bar.show()

fig_bar.write_image(base_dir + "plotly_barplot_star_count.png")


In [22]:
# 2. Visualizing Star Color data
star_color_counts = star_df['Star color'].value_counts().reset_index()
star_color_counts.columns = ['Star color', 'Count']

fig_color = px.bar(
    star_color_counts, 
    x='Star color', 
    y='Count',
    title='Visualizing the count of Star Colors',
    color='Star color',
)

# Customize further
fig_color.update_layout(
    title_font=dict(color='royalblue', size=20, family='Arial', weight='bold'),
    title_x=0.5,
    plot_bgcolor='rgba(0,0,0,0.8)',
    paper_bgcolor='rgba(0,0,0,0.8)',
    yaxis_title="Count",
    xaxis_title="Star color",
    xaxis=dict(tickangle=15, tickfont=dict(color='orange', size=11)),
    yaxis=dict(tickfont=dict(color='pink')),
)

# Add labels on top of bars
for i, row in star_color_counts.iterrows():
    fig_color.add_annotation(
        x=row['Star color'],
        y=row['Count'],
        text=str(row['Count']),
        font=dict(color='red', size=14, weight='bold'),
        showarrow=False,
        yshift=10
    )

fig_color.show()

# Save the figure
fig_color.write_image(base_dir + "plotly_star_colors_viz.png")

In [23]:
# 3. Visualize Outliers with Box Plots
# Create subplots
numeric_features = ['Temperature (K)', 'Luminosity(L/Lo)', 'Radius(R/Ro)', 'Absolute magnitude(Mv)']

fig_box = make_subplots(
    rows=1, 
    cols=4,
    subplot_titles=numeric_features,
    horizontal_spacing=0.05
)

# Colors for different star types
colors = {
    'Brown Dwarf': 'brown',
    'Red Dwarf': 'red',
    'White Dwarf': 'white',
    'Main Sequence': 'yellow',
    'Supergiants': 'lightskyblue',
    'Hypergiants': 'orange'
}

# Add box plots for each numeric feature
for i, feature in enumerate(numeric_features):
    for star_type in star_df['Star type'].unique():
        # Skip if there are no data points for this star type
        if len(star_df[star_df['Star type'] == star_type]) == 0:
            continue
        
        color = colors.get(star_type, 'gray')  # Default to gray if not in our color mapping
        
        fig_box.add_trace(
            go.Box(
                y=star_df[star_df['Star type'] == star_type][feature],
                name=star_type,
                marker_color=color,
                showlegend=i == 0  # Only show legend on first subplot
            ),
            row=1, 
            col=i+1
        )

# Update layout for better visualization
fig_box.update_layout(
    title_text='Visualizing the outliers in Numeric features of Star Type',
    title_font=dict(color='black', size=15, weight='bold'),
    title_x=0.5,
    height=600,
    width=1200,
    boxmode='group',
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=-0.3,
        xanchor="center",
        x=0.5
    ),
    plot_bgcolor='white',
    paper_bgcolor='white',
)

# Update y-axis titles
for i in range(1, 5):
    fig_box.update_yaxes(title_text='', row=1, col=i)
    fig_box.update_xaxes(visible=False, row=1, col=i)  # Hide x-axis labels
    fig_box.layout.annotations[i-1].update(font=dict(color='red'))

fig_box.show()

# Save the figure
fig_box.write_image(base_dir + "plotly_boxplot_star_type.png")

In [24]:
# 4. Create Line plots for numeric features
fig_line = make_subplots(
    rows=4, 
    cols=1,
    subplot_titles=numeric_features,
    vertical_spacing=0.1
)

colors = ['royalblue', 'gold', 'lime', 'magenta']

# Add line plots for each numeric feature
for i, feature in enumerate(numeric_features):
    fig_line.add_trace(
        go.Scatter(
            y=star_df[feature].sort_values(),
            mode='lines',
            line=dict(color=colors[i], width=2),
            name=feature
        ),
        row=i+1, 
        col=1
    )

# Update layout for better visualization
fig_line.update_layout(
    title_text='Visualizing the distribution of Numeric Features',
    title_font=dict(color='hotpink', size=15, weight='bold'),
    title_x=0.5,
    height=800,
    width=900,
    showlegend=False,
    plot_bgcolor='rgba(0,0,0,0.8)',
    paper_bgcolor='rgba(0,0,0,0.8)',
)

# Update y-axis titles
for i in range(4):
    fig_line.layout.annotations[i].update(font=dict(color='red'))

fig_line.show()

# Save the figure
fig_line.write_image(base_dir + "plotly_line_subplot.png")

In [25]:


# 5. Create a pairplot equivalent using Plotly
# Plotly doesn't have a direct pairplot function like seaborn
# but we can create a similar visualization using a grid of scatter plots

# First, get only the numeric columns and add the Spectral Class for coloring
pair_data = star_df[numeric_features + ['Spectral Class']]

# Create the figure with subplots
fig_pair = make_subplots(
    rows=4, 
    cols=4,
    shared_xaxes=True,
    shared_yaxes=True,
    horizontal_spacing=0.02,
    vertical_spacing=0.02
)

# Get unique spectral classes
spectral_classes = star_df['Spectral Class'].unique()

# Color mapping for spectral classes - create colors dynamically based on available classes
import plotly.express as px
spectral_colors = {}
colorscale = px.colors.qualitative.Plotly
for i, spec_class in enumerate(spectral_classes):
    spectral_colors[spec_class] = colorscale[i % len(colorscale)]

# Add scatter plots for each combination
for i, feat_y in enumerate(numeric_features):
    for j, feat_x in enumerate(numeric_features):
        # If it's the diagonal, we'll show a histogram
        if i == j:
            for spectral in spectral_classes:
                subset = star_df[star_df['Spectral Class'] == spectral]
                if len(subset) > 0:  # Skip if no data for this class
                    fig_pair.add_trace(
                        go.Histogram(
                            x=subset[feat_x],
                            name=spectral,
                            marker_color=spectral_colors[spectral],
                            opacity=0.7,
                            showlegend=False if (i > 0 or j > 0) else True
                        ),
                        row=i+1, 
                        col=j+1
                    )
        else:
            for spectral in spectral_classes:
                subset = star_df[star_df['Spectral Class'] == spectral]
                if len(subset) > 0:  # Skip if no data for this class
                    fig_pair.add_trace(
                        go.Scatter(
                            x=subset[feat_x],
                            y=subset[feat_y],
                            mode='markers',
                            marker=dict(
                                color=spectral_colors[spectral],
                                size=5,
                                opacity=0.7
                            ),
                            name=spectral,
                            showlegend=False
                        ),
                        row=i+1, 
                        col=j+1
                    )

# Update layout
fig_pair.update_layout(
    title_text='Pairplot of Star Features by Spectral Class',
    title_font=dict(size=16),
    title_x=0.5,
    height=900,
    width=900,
    plot_bgcolor='rgba(0,0,0,0.8)',
    paper_bgcolor='rgba(0,0,0,0.8)',
    legend=dict(
        title='Spectral Class',
        orientation="h",
        yanchor="bottom",
        y=-0.1,
        xanchor="center",
        x=0.5
    )
)

# Update axis labels
for i, feat_y in enumerate(numeric_features):
    for j, feat_x in enumerate(numeric_features):
        # Only show axis titles on the edge plots
        if j == 0:  # First column
            fig_pair.update_yaxes(title_text=feat_y, row=i+1, col=j+1, title_standoff=0)
        if i == 3:  # Last row
            fig_pair.update_xaxes(title_text=feat_x, row=i+1, col=j+1, title_standoff=0)

# Save the figure

fig_pair.show()

fig_pair.write_image(base_dir + "plotly_pairplot.png")



In [26]:
# Define the star type properties (assuming star_df has numeric values for Star type)
star_types = {
    0: {'label': 'Brown Dwarf', 'color': 'brown', 'size': 30},
    1: {'label': 'Red Dwarf', 'color': 'red', 'size': 35},
    2: {'label': 'White Dwarf', 'color': 'white', 'size': 40},
    3: {'label': 'Main Sequence', 'color': 'cyan', 'size': 30},
    4: {'label': 'Supergiants', 'color': 'orange', 'size': 100},
    5: {'label': 'Hypergiants', 'color': 'maroon', 'size': 150}
}

# Create the HR diagram
fig = go.Figure()

# Map numeric types to labels if needed
if pd.api.types.is_numeric_dtype(star_df['Star type']):
    star_df['Star Type Label'] = star_df['Star type'].map({k: v['label'] for k, v in star_types.items()})
else:
    # If already string labels, create a mapping from label to properties
    label_to_props = {v['label']: v for k, v in star_types.items()}
    star_df['Star Type Label'] = star_df['Star type']

# Add traces for each star type
for star_type_num, properties in star_types.items():
    # Filter the dataframe for the current star type
    if pd.api.types.is_numeric_dtype(star_df['Star type']):
        star_subset = star_df[star_df['Star type'] == star_type_num]
    else:
        star_subset = star_df[star_df['Star type'] == properties['label']]
    
    # Skip if no stars of this type
    if len(star_subset) == 0:
        continue
    
    # Add scatter plot for this star type
    fig.add_trace(go.Scatter(
        x=star_subset['Temperature (K)'],
        y=star_subset['Absolute magnitude(Mv)'],
        mode='markers',
        name=properties['label'],
        marker=dict(
            size=properties['size'] / 3,  # Adjust size for plotly
            color=properties['color'],
            line=dict(width=1, color='black')
        )
    ))

# Add the Sun
fig.add_trace(go.Scatter(
    x=[5778],
    y=[4.83],
    mode='markers',
    name='Sun',
    marker=dict(
        size=25,  # Adjust size for plotly
        color='yellow',
        line=dict(width=1, color='black')
    )
))

# Update layout for better visualization
fig.update_layout(
    title=dict(
        text="Hertzsprung-Russell Diagram",
        font=dict(size=20, color='royalblue'),
        x=0.5
    ),
    xaxis=dict(
        title=dict(
            text="Temperature (K)",
            font=dict(color='hotpink', size=16)
        ),
        autorange='reversed'  # Invert x-axis
    ),
    yaxis=dict(
        title=dict(
            text="Absolute Magnitude (Mv)",
            font=dict(color='hotpink', size=16)
        ),
        autorange='reversed'  # Invert y-axis
    ),
    plot_bgcolor='rgba(0,0,0,0.8)',
    paper_bgcolor='rgba(0,0,0,0.8)',
    legend=dict(
        itemsizing='constant',
        bgcolor='rgba(50,50,50,0.7)',
        bordercolor='white',
        borderwidth=1
    ),
    width=900,
    height=600
)

# Make the grid lines lighter
fig.update_xaxes(gridcolor='rgba(128,128,128,0.2)', zerolinecolor='rgba(128,128,128,0.5)')
fig.update_yaxes(gridcolor='rgba(128,128,128,0.2)', zerolinecolor='rgba(128,128,128,0.5)')

# Add annotations to help understand the diagram
fig.add_annotation(
    x=3000,
    y=-5,
    text="Giants",
    showarrow=False,
    font=dict(size=16, color='orange')
)

fig.add_annotation(
    x=15000,
    y=0,
    text="Main Sequence",
    showarrow=False,
    font=dict(size=16, color='cyan')
)

fig.show()

# Save the figure
fig.write_image("./plotly_hr_diagram.png")
