In [1]:
import pandas as pd
import plotly.express as px
from pandas import Series
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [4]:
#xlsx file is slightly edited (empty rows are deleted)
sng_students = pd.read_excel('data_2024-10-08_edited.xlsx')
#transform wide table into the long one
sng_students_long = pd.melt(sng_students,id_vars=['Страна'],
                        value_vars=['1991', '1992', '1993', '1994', '1995', '1996', '1997',
                                   '1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006',
                                   '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015',
                                   '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023'])
sng_students_long.rename(columns={'Страна':'Country','variable':'year','value':'students_per_cap'},inplace=True)

In [5]:
translation_dict = {
    'Азербайджан': 'Azerbaijan',
    'Армения': 'Armenia',
    'Беларусь': 'Belarus',
    'Казахстан': 'Kazakhstan',
    'Кыргызстан': 'Kyrgyzstan',
    'Молдова': 'Moldova',
    'Россия': 'Russia',
    'Таджикистан': 'Tajikistan',
    'Туркменистан': 'Turkmenistan',
    'Узбекистан': 'Uzbekistan',
    'Украина': 'Ukraine'
}
sng_students_long['Country'] = sng_students_long['Country'].replace(translation_dict)

<p>A line chart is the first choice to show the comparative dynamics of higher education students in different countries.</p>

In [64]:
color_palette = ['#D3D3D3', '#A9A9A9', '#ffe119', '#4363d8', '#f58231', '#2F4F4F', '#46f0f0', '#f032e6', '#bcf60c', '#fabebe',
                  '#008080', '#e6beff']

colors = [(color_palette[i % len(color_palette)] if cat != 'Uzbekistan' else '#FF0000') 
          for i, cat in enumerate(sng_students_long['Country'])]

fig = px.line(sng_students_long,x='year',y='students_per_cap',color='Country',
              color_discrete_sequence=colors,
              line_shape='spline',  # Use spline for smooth lines
               title='Number of students in higher vocational educational institutions (per 10,000 population), persons')
fig.add_annotation(
    text="@Conspect Labs. Data Source: Interstate Statistical Committee of the Commonwealth of Independent States",
    xref="paper",  # Use paper coordinates
    yref="paper",  # Use paper coordinates
    x=0,           # Position at the left edge
    y=-0.2,        # Position below the plot area
    showarrow=False,  # No arrow for the annotation
    font=dict(size=12)  # Font size for the annotation
)

fig.update_layout(margin=dict(b=100),
                  plot_bgcolor='white',
                  xaxis_title='',  # Remove x-axis title
                  yaxis_title='',  # Remove y-axis title
                  xaxis=dict(showticklabels=True, 
                             gridcolor='#cdcdcd', 
                             gridwidth=0.1), 
                               # Keep x-axis tick labels and thin vertical lines
                  yaxis=dict(showticklabels=True, 
                             gridcolor='#cdcdcd', 
                             gridwidth=0.1,
                             ticklabelposition="outside",  # Position the tick labels inside
                            title_standoff=0,
                              ticks='outside',
                              ticklabelstandoff=20),
                  font_color="black",
                 height = 600,
                 width = 1000
)
fig.write_image('Number of students Line Chart.png')
fig.show()

<p>First, but definitely not the best, due to the 'spaghetti' vibe. The second choice is a multiseries chart with a distinct line for each country in every subplot.</p>

In [65]:
countries = sng_students_long['Country'].unique()

fig = px.line(sng_students_long, x='year', y='students_per_cap',
              facet_col='Country', facet_col_wrap=5,
              width=1200,
              height=800,
              facet_col_spacing=0.1, facet_row_spacing=0.3,
               line_shape='spline',
               title='Number of students in higher vocational educational institutions (per 10,000 population), persons')

# Update x-axis properties for each facet
num_columns = 5  # Adjust this if you change the number of columns
fig.for_each_xaxis(lambda xaxis: xaxis.update(
    title='',
    showticklabels=True,
    gridcolor='#cdcdcd',
    gridwidth=0.1,
    ticklabelposition="outside",  # Position the tick labels outside
    title_standoff=0,
    ticks='outside',
    ticklabelstandoff=10
))
fig.for_each_yaxis(lambda yaxis: yaxis.update(
    title='',
    showticklabels=True,
    gridcolor='#cdcdcd',
    gridwidth=0.1,
    ticklabelposition="outside",  # Position the tick labels outside
    title_standoff=0,
    ticks='outside',
    ticklabelstandoff=10
))
fig.update_layout(
    margin=dict(b=100),
    plot_bgcolor='white',
    xaxis_title='',  # X-axis title
    yaxis_title='',  # Remove Y-axis title
    font_color="black",
    height=600,
    width=1000
)

fig.add_annotation(
    text="@Conspect Labs. Data Source: Interstate Statistical Committee of the Commonwealth of Independent States",
    xref="paper",  # Use paper coordinates
    yref="paper",  # Use paper coordinates
    x=0,           # Position at the left edge
    y=-0.2,        # Position below the plot area
    showarrow=False,  # No arrow for the annotation
    font=dict(size=12)  # Font size for the annotation
)

fig.write_image('Number of students Faceted Line Chart.png')
fig.show()

<p>The chart is quite clean, but it does not effectively show every country in comparison to the others. Therefore, the third type of these multiseries charts involves creating specifically colored line charts in subplots.

In [67]:
countries = sng_students_long['Country'].unique()
num_countries = len(countries)
num_cols = 4  # Adjust this as needed
num_rows = (num_countries + num_cols - 1) // num_cols  # Calculate required rows

fig = make_subplots(rows=num_rows, cols=num_cols, subplot_titles=countries)

for i, country in enumerate(countries):
    country_data = sng_students_long[sng_students_long['Country'] == country]
    row = i // num_cols + 1
    col = i % num_cols + 1
    
    for c in countries:
        c_data = sng_students_long[sng_students_long['Country'] == c]
        color = 'grey' if c != country else 'darkblue'
        line_width = 1 if c != country else 2  # Thicker line for the highlighted country
        
        fig.add_trace(
            go.Scatter(
                x=c_data['year'],
                y=c_data['students_per_cap'],
                mode='lines',
                name=c,
                line=dict(color=color, width=line_width),  # Set line width here
                line_shape='spline'
            ),
            row=row,
            col=col
        )

# Add highlighted country trace last to ensure it's on top
for i, country in enumerate(countries):
    country_data = sng_students_long[sng_students_long['Country'] == country]
    row = i // num_cols + 1
    col = i % num_cols + 1
    
    fig.add_trace(
        go.Scatter(
            x=country_data['year'],
            y=country_data['students_per_cap'],
            mode='lines',
            name=country,
            line=dict(color='black', width=2),  # Always dark blue and thicker
            line_shape='spline'
        ),
        row=row,
        col=col
    )

# Update layout properties
fig.update_layout(
    title='Number of students in higher education institutions (per 10,000 population), persons',
    height=600,
    width=1200,
    showlegend=False,
    plot_bgcolor='white',
)

# Update axes properties and gridlines
for i in range(1, num_rows + 1):
    for j in range(1, num_cols + 1):
        fig.update_xaxes(title_text='', row=i, col=j,
                         tickfont=dict(size=10),
                         gridcolor='#cdcdcd', gridwidth=0.05)
        fig.update_yaxes(title_text='', row=i, col=j,
                         tickfont=dict(size=10),
                         gridcolor='#cdcdcd', gridwidth=0.05)
fig.add_annotation(
    text="@Conspect Labs. Data Source: Interstate Statistical Committee of the Commonwealth of Independent States",
    xref="paper",  # Use paper coordinates
    yref="paper",  # Use paper coordinates
    x=0,           # Position at the left edge
    y=-0.15,        # Position below the plot area
    showarrow=False,  # No arrow for the annotation
    font=dict(size=12)  # Font size for the annotation
)
fig.write_image('Number_of_students Subplots Line Chart.png')
fig.show()