In [21]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio

In [22]:
# Set a default template for all plots
template = "plotly_white"

base_dir = 'star_plots/'

In [23]:
# Load the cleaned star data from CSV
star_df = pd.read_csv('cleaned_star_data.csv')

# Display first few rows of the dataframe
# star_df.head()

In [24]:
# star_df.info()

In [25]:
# Map numeric Star type to category names if needed
star_type_mapping = {
    0: 'Brown Dwarf',
    1: 'Red Dwarf',
    2: 'White Dwarf',
    3: 'Main Sequence',
    4: 'Supergiants',
    5: 'Hypergiants'
}

In [26]:
# Check if Star type is numeric and needs mapping
if pd.api.types.is_numeric_dtype(star_df['Star type']):
    star_df['Star type'] = star_df['Star type'].map(star_type_mapping)

In [27]:
# 1. Bar chart for star types - Minimalistic version
def create_star_type_bar_chart(star_df):
    star_type_counts = star_df['Star type'].value_counts().reset_index()
    star_type_counts.columns = ['Star type', 'Count']
    
    # Sort by star type if numeric
    if pd.api.types.is_numeric_dtype(star_type_counts['Star type']):
        star_type_counts = star_type_counts.sort_values('Star type')
    
    fig = px.bar(
        star_type_counts, 
        x='Star type', 
        y='Count',
        title='Star Types Count'
    )
    
    fig.update_layout(
        template=template,
        title_x=0.5
    )
    
    return fig

In [28]:
# 2. Bar chart for star colors - Minimalistic version
def create_star_color_bar_chart(star_df):
    star_color_counts = star_df['Star color'].value_counts().reset_index()
    star_color_counts.columns = ['Star color', 'Count']
    
    fig = px.bar(
        star_color_counts, 
        x='Star color', 
        y='Count',
        title='Star Colors Count'
    )
    
    fig.update_layout(
        template=template,
        title_x=0.5
    )
    
    return fig

In [29]:
# 3. Box plots for numeric features - with star type grouping
def create_boxplots(star_df):
    numeric_features = ['Temperature (K)', 'Luminosity(L/Lo)', 'Radius(R/Ro)', 'Absolute magnitude(Mv)']
    
    # Let's create 2 rows, 2 columns for better spacing
    fig = make_subplots(
        rows=2, 
        cols=2,
        subplot_titles=numeric_features,
        vertical_spacing=0.15,
        horizontal_spacing=0.1
    )
    
    # Define colors for different star types for consistency
    colors = {
        'Brown Dwarf': 'brown',
        'Red Dwarf': 'red',
        'White Dwarf': 'lightskyblue',
        'Main Sequence': 'yellow',
        'Supergiants': 'blue',
        'Hypergiants': 'orange'
    }
    
    # Position mapping for 2x2 grid
    positions = [(1,1), (1,2), (2,1), (2,2)]
    
    # Add box plots for each numeric feature grouped by star type
    for i, feature in enumerate(numeric_features):
        row, col = positions[i]
        
        for star_type in star_df['Star type'].unique():
            # Filter data for this star type
            subset = star_df[star_df['Star type'] == star_type]
            
            # Skip if there are no data points for this star type
            if len(subset) == 0:
                continue
            
            # Get color from mapping or use a default
            color = colors.get(star_type, 'gray')
            
            fig.add_trace(
                go.Box(
                    y=subset[feature],
                    name=star_type,
                    marker_color=color,
                    boxmean=True,
                    boxpoints='outliers',  # Only show outlier points
                    jitter=0,  # Remove jittering 
                    pointpos=0,  # Center points
                    line_width=2,  # Make box border thicker
                    fillcolor=color,  # Fill the box with color
                    opacity=0.7,  # Add some transparency
                    showlegend=i == 0,  # Only show legend for the first subplot
                    # Increase the box width
                    quartilemethod="linear",  # Use the linear method for quartile calculation
                    width=0.4  # Add this parameter to increase box width
                ),
                row=row, 
                col=col
            )
    
    fig.update_layout(
        template=template,
        title='Boxplots of Numeric Features by Star Type',
        title_x=0.5,
        height=800,  # Increased height
        width=1000,
        boxmode='group',
        boxgroupgap=0.2,  # Increase gap between box groups
        boxgap=0.1,       # Increase gap between boxes
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=-0.15,
            xanchor="center",
            x=0.5,
            font=dict(size=10)
        )
    )
    
    # Add feature names as y-axis titles for clarity
    for i, feature in enumerate(numeric_features):
        row, col = positions[i]
        fig.update_yaxes(title_text=feature, row=row, col=col)
    
    return fig

In [30]:
# 4. Line plots for numeric features - Minimalistic version
def create_line_plots(star_df):
    numeric_features = ['Temperature (K)', 'Luminosity(L/Lo)', 'Radius(R/Ro)', 'Absolute magnitude(Mv)']
    
    fig = make_subplots(
        rows=4, 
        cols=1,
        subplot_titles=numeric_features,
        vertical_spacing=0.1
    )
    
    for i, feature in enumerate(numeric_features):
        fig.add_trace(
            go.Scatter(
                y=star_df[feature],
                mode='lines',
                name=feature
            ),
            row=i+1, 
            col=1
        )
    
    fig.update_layout(
        template=template,
        title='Distribution of Numeric Features',
        title_x=0.5,
        showlegend=False,
        height=800,
        width=800
    )
    
    return fig

In [31]:
# 5. Scatter plots matrix (improved pairplot)
def create_scatter_matrix(star_df):
    numeric_features = ['Temperature (K)', 'Luminosity(L/Lo)', 'Radius(R/Ro)', 'Absolute magnitude(Mv)']
    
    # Create the scatter matrix with improved parameters
    fig = px.scatter_matrix(
        star_df,
        dimensions=numeric_features,
        color='Spectral Class',
        title='Scatter Matrix of Star Features',
        opacity=0.7,  # Slightly higher opacity for better visibility
        height=900,   # Increased height for better clarity
        width=1000,   # Width to maintain proportion
        color_discrete_sequence=px.colors.qualitative.Bold,  # Better color scheme
        labels={col: col.split('(')[0].strip() for col in numeric_features}  # Shorter axis labels
    )
    
    fig.update_layout(
        template=template,
        title_x=0.5,
        title_font=dict(size=20),  # Larger title font
        font=dict(size=12),        # Larger overall font size
        dragmode='select',         # Enable box select mode
        margin=dict(l=50, r=50, t=80, b=50)  # Adjust margins for better layout
    )
    
    # Improve marker appearance
    fig.update_traces(
        marker=dict(
            size=6,                # Slightly larger points
            line=dict(width=0.5),  # Add thin line around markers for definition
            symbol='circle'        # Consistent symbol
        ),
        diagonal_visible=True      # Ensure diagonals are visible
    )
    
    # Improve readability of axis titles
    for axis in fig.layout:
        if axis.startswith('xaxis') or axis.startswith('yaxis'):
            fig.layout[axis].title.font.size = 14
    
    # Add grid lines for better readability
    fig.update_xaxes(showgrid=True, gridwidth=0.5, gridcolor='rgba(128,128,128,0.2)')
    fig.update_yaxes(showgrid=True, gridwidth=0.5, gridcolor='rgba(128,128,128,0.2)')
    
    return fig

In [32]:
# # 6. Hertzsprung-Russell Diagram - Minimalistic version
# def create_hr_diagram(star_df):
#     fig = px.scatter(
#         star_df,
#         x='Temperature (K)',
#         y='Absolute magnitude(Mv)',
#         color='Star type',
#         title='Hertzsprung-Russell Diagram',
#         size_max=10
#     )
    
#     # Add the Sun
#     fig.add_trace(go.Scatter(
#         x=[5778],
#         y=[4.83],
#         mode='markers',
#         name='Sun',
#         marker=dict(size=10, color='gold')
#     ))
    
#     fig.update_layout(
#         template=template,
#         title_x=0.5,
#         xaxis=dict(autorange='reversed'),
#         yaxis=dict(autorange='reversed')
#     )
    
#     return fig

def create_hr_diagram(star_df):
    # Create base scatter plot
    fig = px.scatter(
        star_df, 
        x='Temperature (K)', 
        y='Absolute magnitude(Mv)', 
        color='Star type',
        size='Radius (R_sun)' if 'Radius (R_sun)' in star_df.columns else None,  # Use star radius for size if available
        hover_name='Star name' if 'Star name' in star_df.columns else None,  # Use star name for hover if available
        hover_data=['Distance (ly)', 'Luminosity (L_sun)'] if all(col in star_df.columns for col in ['Distance (ly)', 'Luminosity (L_sun)']) else None,
        title='Hertzsprung-Russell Diagram',
        color_discrete_sequence=px.colors.qualitative.Bold,  # More vibrant color scheme
        opacity=0.8,  # Slight transparency for better visibility when points overlap
        size_max=15  # Slightly larger maximum size
    )
    
    # Add the Sun with improved marker
    fig.add_trace(go.Scatter(
        x=[5778], 
        y=[4.83], 
        mode='markers+text',
        name='Sun',
        text=['Sun'],
        textposition='top center',
        marker=dict(
            size=12,
            color='gold',
            line=dict(width=2, color='orange'),
            symbol='star'  # Use star symbol for the Sun
        )
    ))
    
    # Improve layout
    fig.update_layout(
        template='plotly_dark',  # Dark theme suitable for astronomy
        title={
            'text': 'Hertzsprung-Russell Diagram',
            'font': {'size': 24, 'family': 'Arial, sans-serif'}
        },
        title_x=0.5,
        xaxis=dict(
            title='Temperature (K)',
            title_font={'size': 18},
            autorange='reversed',
            gridcolor='rgba(128, 128, 128, 0.2)',
            minor_showgrid=True
        ),
        yaxis=dict(
            title='Absolute Magnitude (Mv)',
            title_font={'size': 18},
            autorange='reversed',
            gridcolor='rgba(128, 128, 128, 0.2)',
            minor_showgrid=True
        ),
        legend=dict(
            title='Star Type',
            bordercolor='rgba(255, 255, 255, 0.3)',
            borderwidth=1,
            font=dict(size=12)
        ),
        plot_bgcolor='rgba(15, 15, 35, 1)',  # Dark blue background
        paper_bgcolor='rgba(10, 10, 30, 1)',  # Darker blue for the surrounding area
        margin=dict(l=80, r=80, t=100, b=80),
        height=700,  # Larger plot height
        width=900    # Larger plot width
    )
    
    # Add star classification regions if desired (simplified version)
    fig.add_annotation(
        x=30000, y=0,
        text="Hot Blue Stars",
        showarrow=False,
        font=dict(size=14, color="lightblue")
    )
    
    fig.add_annotation(
        x=6000, y=15,
        text="Red Giants",
        showarrow=False,
        font=dict(size=14, color="lightcoral")
    )
    
    fig.add_annotation(
        x=6000, y=0,
        text="Main Sequence",
        showarrow=False,
        font=dict(size=14, color="white")
    )
    
    # Add a diagonal line for the main sequence (approximate)
    temps = [3000, 30000]
    mags = [10, -5]
    fig.add_trace(go.Scatter(
        x=temps,
        y=mags,
        mode='lines',
        line=dict(dash='dot', width=1, color='rgba(255, 255, 255, 0.3)'),
        name='Main Sequence Trend',
        showlegend=False
    ))
    
    return fig

In [33]:
fig1 = create_star_type_bar_chart(star_df)

fig1.show()

In [34]:
fig2 = create_star_color_bar_chart(star_df)

fig2.show()

In [35]:
fig3 = create_boxplots(star_df)

fig3.show()

In [36]:
fig4 = create_line_plots(star_df)

fig4.show()

In [37]:
fig5 = create_scatter_matrix(star_df)

fig5.show()

In [38]:
fig6 = create_hr_diagram(star_df)

fig6.show()

In [41]:
def create_hr_diagram_improved(star_df):
    # Create base scatter plot
    fig = px.scatter(
        star_df, 
        x='Temperature (K)', 
        y='Absolute magnitude(Mv)', 
        color='Star type',
        title='Hertzsprung-Russell Diagram',
        opacity=0.8,
        color_discrete_sequence=px.colors.qualitative.Vivid
    )
    
    # Add the Sun with improved marker
    fig.add_trace(go.Scatter(
        x=[5778], 
        y=[4.83], 
        mode='markers+text',
        name='Sun',
        text=['Sun'],
        textposition='top center',
        marker=dict(
            size=12,
            color='gold',
            line=dict(width=2, color='orange'),
            symbol='star'
        )
    ))
    
    # Add famous stars with their properties
    famous_stars = {
        'Sirius': {'temp': 9940, 'mag': 1.42, 'color': 'rgb(170, 170, 255)'},
        'Betelgeuse': {'temp': 3600, 'mag': -5.85, 'color': 'rgb(255, 100, 50)'},
        'Vega': {'temp': 9602, 'mag': 0.58, 'color': 'rgb(200, 200, 255)'},
        'Proxima Centauri': {'temp': 3042, 'mag': 15.6, 'color': 'rgb(255, 120, 100)'},
    }
    
    for star, props in famous_stars.items():
        fig.add_trace(go.Scatter(
            x=[props['temp']], 
            y=[props['mag']], 
            mode='markers+text',
            name=star,
            text=[star],
            textposition='top right',
            marker=dict(
                size=10,
                color=props['color'],
                symbol='star'
            )
        ))
    
    # Improve layout
    fig.update_layout(
        template='plotly_dark',
        title={
            'text': 'Interactive Hertzsprung-Russell Diagram',
            'font': {'size': 24}
        },
        title_x=0.5,
        xaxis=dict(
            title='Temperature (K)',
            autorange='reversed',
            gridcolor='rgba(128, 128, 128, 0.2)'
        ),
        yaxis=dict(
            title='Absolute Magnitude (Mv)',
            autorange='reversed',
            gridcolor='rgba(128, 128, 128, 0.2)'
        ),
        legend=dict(
            title='Star Type',
            bordercolor='rgba(255, 255, 255, 0.3)',
            borderwidth=1
        ),
        plot_bgcolor='rgb(10, 10, 35)',
        paper_bgcolor='rgb(5, 5, 25)',
        height=700,
        width=900
    )
    
    # Add main sequence line (simplified)
    main_sequence_temps = [30000, 20000, 10000, 7500, 6000, 5000, 3500]
    main_sequence_mags = [-5, -2.5, 0, 2, 4, 6, 9]
    
    fig.add_trace(go.Scatter(
        x=main_sequence_temps,
        y=main_sequence_mags,
        mode='lines',
        line=dict(dash='solid', width=2, color='white'),
        name='Main Sequence'
    ))
    
    # Add simple annotations for star types
    annotations = [
        dict(x=25000, y=-3, text="Blue Giants", showarrow=False, font=dict(color="lightblue")),
        dict(x=4000, y=-3, text="Red Giants", showarrow=False, font=dict(color="lightcoral")),
        dict(x=15000, y=2, text="Main Sequence", showarrow=False, font=dict(color="white")),
        dict(x=10000, y=12, text="White Dwarfs", showarrow=False, font=dict(color="white"))
    ]
    
    for annotation in annotations:
        fig.add_annotation(annotation)
    
    return fig

In [42]:
fig7 = create_hr_diagram_improved(star_df)

fig7.show()