In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import glob
import re
import matplotlib.patches as mpatches

from utils import make_unique
from utils import get_region_and_field


# Get all files
all_files = glob.glob('..\detailed\Institutions_*_*.csv')

# Add region and field columns
df_list = []
for file in all_files:
    region, field = get_region_and_field(file)
    # if region == 'ASI' or region == 'AUS':
    df = pd.read_csv(file)
    df['region'] = region
    df['field'] = field
    df_list.append(df)

# xy_distribution

In [None]:
# draw a radar chart for each file
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import pandas as pd
import numpy as np
import glob
from utils import get_region_and_field

for df in df_list:

    region = df['region'].iloc[0]
    field = df['field'].iloc[0]

    # Create a scatter plot
    plt.figure(figsize=(10, 8))
    sns.scatterplot(x='Publication Count', y='Faculty Count', hue='University', s=100, data=df, palette='Set2')

    # Set the scale of the x and y axes
    plt.xscale('log')
    plt.yscale('log')

    # Set the labels of the x and y axes
    plt.xlabel('Publication Count')
    plt.ylabel('Faculty Count')

    # Set the title of the plot
    plt.title('Faculty Count and Publication Count in field {} in region {}'.format(field, region))
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., ncol=2)

    plt.savefig('../images/single_inter/xy_distribution/unis_in_{}_in_{}.svg'.format(region,field), dpi = 300)

    plt.close()

# bubble chart

In [None]:
# draw a radar chart for each file
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import pandas as pd
import numpy as np
import glob
from utils import get_region_and_field

for df in df_list:
    region = df['region'].iloc[0]
    field = df['field'].iloc[0]
    universities_name = df["University"].tolist()
    fac_cnt_list = df["Faculty Count"].tolist()
    pub_cnt_list = df["Publication Count"].tolist()

            
    # Create a DataFrame from the aggregated data
    df_data = pd.DataFrame({
        'University': universities_name,
        'FacCount': fac_cnt_list,
        'PubCount': pub_cnt_list
    })

    fig = px.scatter(df_data, x='University', color='FacCount', size='PubCount',
                    title='Universities of Field {} in the Region {}'.format(field, region), 
                    labels={'University': 'University', 'FacCount': 'FacCount', 'PubCount': 'PubCount'},
                    color_continuous_scale='Viridis', template='plotly_dark')

    fig.add_annotation(
        text='Note: The size reflects publication count. The color reflects faculty count.',
        xref='paper', yref='paper',
        x=0, y=1.05,
        showarrow=False,
        font=dict(size=12, color='white')
    )

    # Show the plot
    fig.show()
    fig.write_html('../images/single_inter/bubble_chart/unis_in_{}_in_{}.html'.format(region,field))