In this notebook, I produce the visuals for the part of Fields of study.

In [1]:
import pandas as pd 
import sys
import numpy as np
import itertools
from collections import Counter
from datetime import datetime
import altair as alt
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
VISPUBDATA_PLUS = pd.read_csv('../data/processed/vispubdata_plus.csv')
df= pd.read_csv('../data/processed/openalex_concept_df.csv')

### Stack area chart

To show the evolution of top ten (and 'Other') Level 1 Concepts of VIS publications over the past 32 years. 

In [3]:
LEVEL = 1
level1_df = df[df.Level == LEVEL]
level1_df.head()

Unnamed: 0,Year,DOI,Title,Number of Concepts,Index of Concept,Concept,Concept ID,Wikidata,Level,Score
2,2011,10.1109/TVCG.2011.185,D³ Data-Driven Documents,9.0,3.0,Information retrieval,https://openalex.org/C23123220,https://www.wikidata.org/wiki/Q816826,1.0,0.391768
3,2011,10.1109/TVCG.2011.185,D³ Data-Driven Documents,9.0,4.0,Data science,https://openalex.org/C2522767166,https://www.wikidata.org/wiki/Q2374463,1.0,0.390022
8,2011,10.1109/TVCG.2011.185,D³ Data-Driven Documents,9.0,9.0,Data mining,https://openalex.org/C124101348,https://www.wikidata.org/wiki/Q172491,1.0,0.322426
18,1991,10.1109/VISUAL.1991.175815,Tree-maps: a space-filling approach to the vis...,17.0,10.0,Artificial intelligence,https://openalex.org/C154945302,https://www.wikidata.org/wiki/Q11660,1.0,0.36513
23,1991,10.1109/VISUAL.1991.175815,Tree-maps: a space-filling approach to the vis...,17.0,15.0,Data mining,https://openalex.org/C124101348,https://www.wikidata.org/wiki/Q172491,1.0,0.338191


In [4]:
# top ten L1 concepts
dic = dict(Counter(level1_df.Concept).most_common(10))
top_ten = list(dic.keys())
top_ten

['Artificial intelligence',
 'Computer vision',
 'Computer graphics (images)',
 'Human–computer interaction',
 'Data science',
 'Algorithm',
 'Data mining',
 'Information retrieval',
 'Machine learning',
 'Multimedia']

In [5]:
# timeseries and rename those concepts not in top ten as "Other"
tuples = []
for group in level1_df.groupby('Year'):
    year = group[0]
    group[1]['Concept New'] = group[1].Concept.apply(
        lambda x: x if x in top_ten else 'Other'
    )
    for concept_group in group[1].groupby('Concept New'):
        concept = concept_group[0]
        count = concept_group[1].shape[0]
        tuples.append((year, concept, count))

In [6]:
# prepare for stack area chart
dff = pd.DataFrame(list(tuples), columns = ['year', 'concept', 'count'])
dff['Year'] = pd.to_datetime(dff['year'], format='%Y')
dff.replace({
    'Artificial intelligence': 'AI',
    'Human–computer interaction': 'HCI',
    'Computer graphics (images)': 'Computer grafics'
    
}, inplace=True)
dff.head()

Unnamed: 0,year,concept,count,Year
0,1990,Algorithm,5,1990-01-01
1,1990,AI,24,1990-01-01
2,1990,Computer grafics,19,1990-01-01
3,1990,Computer vision,12,1990-01-01
4,1990,Data mining,2,1990-01-01


In [13]:
alt.Chart(dff).mark_area().encode(
    x="Year:T",
    y=alt.Y(
        'count:Q',
        title = 'Number of publications',
    ),
    color=alt.Color(
        "concept:N",
        title = 'Fields of Study (L1)'
    )
).configure_axis(
    labelFontSize=15,
    titleFontSize=20
).configure_legend(
    titleFontSize=16,
    labelFontSize=16
).configure_range(
    category={'scheme': 'paired'}
).properties(
    width = 400
)