In [2]:
import pandas as pd
import plotly.graph_objects as go

# correct df 
sorted_merged_df_cleaned = pd.read_csv('../cleaned_data/sorted_merged_df_cleaned.csv')

# normalize enrollment 
from sklearn.preprocessing import MinMaxScaler

# Define the columns to normalize
columns_to_normalize = ['2010 (Gross enrolment ratio, primary and secondary, female (%))',
                        '2011 (Gross enrolment ratio, primary and secondary, female (%))',
                        '2012 (Gross enrolment ratio, primary and secondary, female (%))',
                        '2013 (Gross enrolment ratio, primary and secondary, female (%))',
                        '2014 (Gross enrolment ratio, primary and secondary, female (%))']

# Initialize the MinMaxScaler
scaler = MinMaxScaler()

# Normalize the specified columns
df_normalized = sorted_merged_df_cleaned.copy()  # Create a copy of the DataFrame to avoid modifying the original
df_normalized[columns_to_normalize] = scaler.fit_transform(df_normalized[columns_to_normalize])

# Define dimensions for the parallel coordinates plot
dimensions = ['2014 (GDP per capita (current US$))', 
              '2014 (Gross enrolment ratio, primary and secondary, female (%))',
              '2014 (GNI per capita, Atlas method (current US$))']

# Define your custom color scale
hex_colors = ["#3d6469", "#ffa205", "#ff4500", "#d40637"]

# Define custom labels for the color scale
color_labels = ['Low income', 'Lower middle income', 'Upper middle income', 'High income']

# Calculate the intervals for each color in the custom scale
color_intervals = [i / (len(hex_colors) - 1) for i in range(len(hex_colors))]

# Define custom labels for the dimensions
custom_labels = ['GDP per capita', 'Primary and secondary School', 'GNI per capita']

# Create the parallel coordinates plot using plotly.graph_objects
fig = go.Figure()

# Add parallel coordinates trace
fig.add_trace(go.Parcoords(
    line=dict(color=df_normalized['numeric_income'], 
              colorscale=hex_colors,  # Use your custom color scale
              showscale=True,  # Set showscale to True to display the color scale
              colorbar=dict(  # Define the color bar properties
                  title='Income Group',  # Set the title of the color scale
                  tickvals=[0, 1, 2, 3],  # Specify the tick values
                  ticktext=color_labels,  # Specify the tick labels
                  tickmode='array'  # Use array mode for custom tick labels
              )
             ),
    dimensions=[
        dict(range=[df_normalized[dim].min(), df_normalized[dim].max()],
             label=label,  # Use custom label
             tickformat='.2f' if dim == dimensions[1] else '',  # Set tick format to display percentages
             values=df_normalized[dim])  # Keep values unchanged
        for dim, label in zip(dimensions, custom_labels)
    ],
))

# Update layout to add title
fig.update_layout(
    title="2014 Female Enrollment vs. Economic Measures in US$",  # Set the title of the plot
    title_x=0.5  # Set the x position of the title
)
