# Institutional Analysis

In [None]:
def extract_institutions(inst_str):
    if pd.isna(inst_str) or inst_str == '':
        return []
    return [i.strip() for i in str(inst_str).split(';') if i.strip() != '']


all_institutions = []
for institutions in df['institutions'].dropna():
    all_institutions.extend(extract_institutions(institutions))

## The top 20 institutions contributing to research on generative AI

In [None]:
institution_counts = Counter(all_institutions).most_common(20)
inst_df = pd.DataFrame(institution_counts, columns=['institution', 'count'])

In [None]:
plt.figure(figsize=(14, 10))
sns.barplot(x='count', y='institution', data=inst_df, palette='magma')
plt.title('Top 20 Institutions en recherches sur IA Générative', fontsize=16)
plt.xlabel('Nombre d\'articles', fontsize=14)
plt.ylabel('Institution', fontsize=14)
plt.tight_layout()
plt.savefig('figures/top_institutions.png')
plt.show()

## Cluster distribution per institution

In [None]:
inst_cluster_data = []
for _, row in df.dropna(subset=['institutions', 'kmeans_cluster']).iterrows():
    cluster = row['kmeans_cluster']
    institutions = extract_institutions(row['institutions'])
    for inst in institutions:
        inst_cluster_data.append({'institution': inst, 'cluster': cluster})

inst_cluster_df = pd.DataFrame(inst_cluster_data)

## Top 10 leading institutions

In [None]:
top10_institutions = [i[0] for i in Counter(all_institutions).most_common(10)]
top10_inst_cluster = inst_cluster_df[inst_cluster_df['institution'].isin(top10_institutions)]

## Heatmap : institutions vs clusters

In [None]:
plt.figure(figsize=(14, 10))
inst_cluster_pivot = pd.crosstab(top10_inst_cluster['institution'], top10_inst_cluster['cluster'])
sns.heatmap(inst_cluster_pivot, cmap='YlGnBu', annot=True, fmt='d', linewidths=.5)
plt.title('Les tendances de recherches des tops 10 institutions', fontsize=16)
plt.xlabel('Cluster', fontsize=14)
plt.ylabel('Institution', fontsize=14)
plt.tight_layout()
plt.savefig('figures/institution_cluster_heatmap.png')
plt.show()

## WordCloud of institutions

In [None]:
inst_text = ' '.join(all_institutions)
wordcloud = WordCloud(width=800, height=400, 
                     background_color='white',
                     max_words=100,
                     contour_width=3,
                     contour_color='steelblue').generate(inst_text)

plt.figure(figsize=(16, 8))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.title('WordCloud des institutions de recherches', fontsize=20)
plt.tight_layout()
plt.savefig('figures/institution_wordcloud.png')
plt.show()