In [1]:
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import neattext.functions as nfx
import requests
import time
from tqdm import tqdm

current_directory = os.getcwd()
root_directory = os.path.abspath(os.path.join(current_directory, '..', '..')) 
if root_directory not in sys.path:
   sys.path.append(root_directory)

import logging
logging.basicConfig(level=logging.CRITICAL)

In [2]:
climat_topics_str=['SUSTAINABILITY', 'DURABILITY', 'RESILIENCE', 'HABITABILITY']
climat_topics_str2=['"'+str(x).lower()+'"' for x in climat_topics_str ]

In [3]:
climat_topics_OR_str2=(' OR ').join(climat_topics_str2)

In [4]:
climat_topics_OR_str2

'"sustainability" OR "durability" OR "resilience" OR "habitability"'

In [5]:
climat_keywords_str=['"'+str(x).capitalize()+'"' for x in climat_topics_str ]

In [6]:
climat_keywords_str2=(' OR ').join(climat_keywords_str)
climat_keywords_str2

'"Sustainability" OR "Durability" OR "Resilience" OR "Habitability"'

In [7]:
url=f"https://api.openalex.org/works?filter=has_doi:true,publication_year:{2022},keyword.search:({climat_keywords_str2}),title_and_abstract.search:({climat_topics_OR_str2})&group-by=institutions.country_code"
response = requests.get(url)
data = response.json()

In [8]:
data

{'meta': {'count': 13405,
  'db_response_time_ms': 254,
  'page': 1,
  'per_page': 200,
  'groups_count': 166},
 'group_by': [{'key': 'https://openalex.org/countries/US',
   'key_display_name': 'United States of America',
   'count': 2223},
  {'key': 'https://openalex.org/countries/CN',
   'key_display_name': 'China',
   'count': 1212},
  {'key': 'https://openalex.org/countries/GB',
   'key_display_name': 'United Kingdom of Great Britain and Northern Ireland',
   'count': 975},
  {'key': 'https://openalex.org/countries/ID',
   'key_display_name': 'Indonesia',
   'count': 594},
  {'key': 'https://openalex.org/countries/IT',
   'key_display_name': 'Italy',
   'count': 520},
  {'key': 'https://openalex.org/countries/DE',
   'key_display_name': 'Germany',
   'count': 497},
  {'key': 'https://openalex.org/countries/AU',
   'key_display_name': 'Australia',
   'count': 463},
  {'key': 'https://openalex.org/countries/IN',
   'key_display_name': 'India',
   'count': 445},
  {'key': 'https://ope

In [9]:
dict_countries=[]

for year in range(2013,2024,1):
    url=f"https://api.openalex.org/works?filter=has_doi:true,publication_year:{year},keyword.search:({climat_keywords_str2}),title_and_abstract.search:({climat_topics_OR_str2})&group-by=institutions.country_code"
    response = requests.get(url)
    data = response.json()
    for i in range(0,30,1):
        dict_countries.append({"year":str(year),'country':data['group_by'][i]['key'].replace('https://openalex.org/countries/',''),'count':data['group_by'][i]['count'],'total':data['meta']['count'], 'rank': i+1})

In [10]:
df=pd.DataFrame(dict_countries)
df['percentage']=df.apply(lambda row: round(row['count']*100/row['total'],1),axis=1)

In [11]:
df[df.year=='2022']

Unnamed: 0,year,country,count,total,rank,percentage
270,2022,US,2223,13405,1,16.6
271,2022,CN,1212,13405,2,9.0
272,2022,GB,975,13405,3,7.3
273,2022,ID,594,13405,4,4.4
274,2022,IT,520,13405,5,3.9
275,2022,DE,497,13405,6,3.7
276,2022,AU,463,13405,7,3.5
277,2022,IN,445,13405,8,3.3
278,2022,CA,407,13405,9,3.0
279,2022,NL,310,13405,10,2.3


with Highcharts

In [12]:
# pip install virtualenv
# virtualenv -p python3.10 myenv
# myenv\Scripts\activate
# pip install highcharts_core

In [13]:
from highcharts_core.chart import Chart
from highcharts_core.options import HighchartsOptions
from highcharts_core.options.plot_options import PlotOptions
from highcharts_core.options.axes.x_axis import XAxis
from highcharts_core.options.axes.y_axis import YAxis,YAxisTitle
from highcharts_core.options.axes.accessibility import AxisAccessibility
from highcharts_core.options.axes.title import AxisTitle
from highcharts_core.options.title import Title
from highcharts_core.options.subtitle import Subtitle
from highcharts_core.options.legend import Legend
from highcharts_core.options.plot_options.series import SeriesOptions
from highcharts_core.options.series.area import LineSeries
from highcharts_core.options.series.labels import SeriesLabel
from highcharts_core.options.responsive import Responsive, ResponsiveRules, Condition
from highcharts_core.constants import EnforcedNull
from highcharts_core.options.credits import Credits

In [14]:
chart_options = HighchartsOptions(
    chart={
        'width': 800,  
        'height': 600  
    },
    title = Title(text = 'Part of publications in OpenAlex for 10 countries by year', #Part of publications in OpenAlex for 10 countries by year
                  align = 'left',                                                    #Rank for 10 countries by year in OpenAlex publications
                  style={'fontSize': '40px'}),                                                 
    subtitle = Subtitle(text = 'Source: <a href="https://irecusa.org/programs/solar-jobs-census/" target="_blank">OpenAlex</a>',
                        align = 'left'),
    y_axis = YAxis(title = YAxisTitle(text = 'Part', style={'fontSize': '20px'}), style={'fontSize': '30px'}, min=1, labels={'style': {'fontSize': '20px'}}), #reversed=True
    x_axis = XAxis(title = AxisTitle(text = 'Year', style={'fontSize': '20px'}),
                   labels={'style': {'fontSize': '20px'}},
        accessibility = AxisAccessibility(range_description = 'Range: 2013 to 2023')),
    legend = Legend(layout = 'vertical',
                    align = 'right',
                    vertical_align = 'middle'),
    plot_options = PlotOptions(series = SeriesOptions(point_start = 2013,
                                                      label = SeriesLabel(connector_allowed = False,
                                                                          style={'fontSize': '20px'}))),
    credits = Credits(enabled=False),
    exporting={
        'enabled': False  
    }
)

In [15]:
override_options = HighchartsOptions(legend = Legend(layout = 'horizontal',
                                                     align = 'center',
                                                     vertical_align = 'bottom'))
responsive_config = Responsive(
    rules = [
        ResponsiveRules(chart_options = override_options,
                        condition = Condition(max_width = 500))
    ]
)
chart_options.responsive = responsive_config

In [16]:
','.join(list(df[df.year=='2022'].country))

'US,CN,GB,ID,IT,DE,AU,IN,CA,NL,IR,FR,ES,JP,BR,MY,SE,ZA,TR,PT,KR,CH,NO,GR,PL,RU,AT,DK,BE,HK'

In [17]:
df[df.year=='2022']

Unnamed: 0,year,country,count,total,rank,percentage
270,2022,US,2223,13405,1,16.6
271,2022,CN,1212,13405,2,9.0
272,2022,GB,975,13405,3,7.3
273,2022,ID,594,13405,4,4.4
274,2022,IT,520,13405,5,3.9
275,2022,DE,497,13405,6,3.7
276,2022,AU,463,13405,7,3.5
277,2022,IN,445,13405,8,3.3
278,2022,CA,407,13405,9,3.0
279,2022,NL,310,13405,10,2.3


In [18]:
series1 = LineSeries(name = 'US',
                     data = list(df[df.country=='US']['percentage']))
series2 = LineSeries(name = 'CN',
                     data = list(df[df.country=='CN']['percentage']),
                    color = 'green')
series3 = LineSeries(name = 'IN',
                     data = list(df[df.country=='IN']['percentage']),
                     color = 'red')
series4 = LineSeries(name = 'GB',
                     data = list(df[df.country=='GB']['percentage']),
                     color= 'purple')
series5 = LineSeries(name = 'ID',
                     data = list(df[df.country=='ID']['percentage']),
                     color = 'blue')
series6 = LineSeries(name = 'DE',
                     data = list(df[df.country=='DE']['percentage']),
                     color = 'orange')
series9 = LineSeries(name = 'IT',
                     data = list(df[df.country=='IT']['percentage']),
                     color = 'green')
series7 = LineSeries(name = 'AU',
                     data = list(df[df.country=='AU']['percentage']),
                     color = 'grey')
series8 = LineSeries(name = 'ES',
                     data = list(df[df.country=='ES']['percentage']),
                     color = 'brown')
series9 = LineSeries(name = 'CA',
                     data = list(df[df.country=='CA']['percentage']),
                     color = 'green')
series10 = LineSeries(name = 'FR',
                     data = list(df[df.country=='FR']['percentage']),
                     color = 'yellow')

chart_options.add_series(series1, series2, series3, series4, series5, series6, series9, series7,series8,series9,series10)

In [19]:
chart = Chart.from_options(chart_options)
chart.display()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [21]:
chart_options = HighchartsOptions(
    chart={
        'width': 800,  
        'height': 600  
    },
    title = Title(text = 'Rank for 10 countries by year in OpenAlex publications', #Part of publications in OpenAlex for 10 countries by year
                  align = 'left',                                                    #Rank for 10 countries by year in OpenAlex publications
                  style={'fontSize': '40px'}),                                                 
    subtitle = Subtitle(text = 'Source: <a href="https://irecusa.org/programs/solar-jobs-census/" target="_blank">OpenAlex</a>',
                        align = 'left'),
    y_axis = YAxis(title = YAxisTitle(text = 'Rank', style={'fontSize': '20px'}), reversed=True, style={'fontSize': '30px'}, min=1, labels={'style': {'fontSize': '20px'}}), #reversed=True
    x_axis = XAxis(title = AxisTitle(text = 'Year', style={'fontSize': '20px'}),
                   labels={'style': {'fontSize': '20px'}},
        accessibility = AxisAccessibility(range_description = 'Range: 2013 to 2023')),
    legend = Legend(layout = 'vertical',
                    align = 'right',
                    vertical_align = 'middle'),
    plot_options = PlotOptions(series = SeriesOptions(point_start = 2013,
                                                      label = SeriesLabel(connector_allowed = False,
                                                                          style={'fontSize': '20px'}))),
    credits = Credits(enabled=False),
    exporting={
        'enabled': False  
    }
)
override_options = HighchartsOptions(legend = Legend(layout = 'horizontal',
                                                     align = 'center',
                                                     vertical_align = 'bottom'))
responsive_config = Responsive(
    rules = [
        ResponsiveRules(chart_options = override_options,
                        condition = Condition(max_width = 500))
    ]
)
chart_options.responsive = responsive_config

series1 = LineSeries(name = 'US',
                     data = list(df[df.country=='US']['rank']))
series2 = LineSeries(name = 'CN',
                     data = list(df[df.country=='CN']['rank']),
                    color = 'green')
series3 = LineSeries(name = 'IN',
                     data = list(df[df.country=='IN']['rank']),
                     color = 'red')
series4 = LineSeries(name = 'GB',
                     data = list(df[df.country=='GB']['rank']),
                     color= 'purple')
series5 = LineSeries(name = 'ID',
                     data = list(df[df.country=='ID']['rank']),
                     color = 'blue')
series6 = LineSeries(name = 'DE',
                     data = list(df[df.country=='DE']['rank']),
                     color = 'orange')
series9 = LineSeries(name = 'IT',
                     data = list(df[df.country=='IT']['rank']),
                     color = 'green')
series7 = LineSeries(name = 'AU',
                     data = list(df[df.country=='AU']['rank']),
                     color = 'grey')
series8 = LineSeries(name = 'ES',
                     data = list(df[df.country=='ES']['rank']),
                     color = 'brown')
series9 = LineSeries(name = 'CA',
                     data = list(df[df.country=='CA']['rank']),
                     color = 'green')
series10 = LineSeries(name = 'FR',
                     data = list(df[df.country=='FR']['rank']),
                     color = 'yellow')

chart_options.add_series(series1, series2, series3, series4, series5, series6, series9, series7,series8,series9,series10)

chart = Chart.from_options(chart_options)
chart.display()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [52]:
url=f"https://api.openalex.org/works?filter=has_doi:true,publication_year:{2019},keyword.search:({climat_keywords_str2}),title_and_abstract.search:({climat_topics_OR_str2})&group-by=topics.field.id"
response = requests.get(url)
data = response.json()

In [73]:
dict_countries=[]
max=0

for year in range(2019,2024,1):
    url=f"https://api.openalex.org/works?filter=has_doi:true,publication_year:{year},keyword.search:({climat_keywords_str2}),title_and_abstract.search:({climat_topics_OR_str2}),institutions.country_code:'FR'&group-by=topics.field.id"
    response = requests.get(url)
    data = response.json()
    for i in range(len(data['group_by'])):
        dict_countries.append({"year":str(year),'country':data['group_by'][i]['key_display_name'],'count':data['group_by'][i]['count'],'total':data['meta']['count'], 'rank': i+1})

In [76]:
data

{'meta': {'count': 0,
  'db_response_time_ms': 115,
  'page': 1,
  'per_page': 200,
  'groups_count': 0},
 'group_by': [{'key': 'https://openalex.org/fields/27',
   'key_display_name': 'Medicine',
   'count': 0},
  {'key': 'https://openalex.org/fields/33',
   'key_display_name': 'Social Sciences',
   'count': 0},
  {'key': 'https://openalex.org/fields/22',
   'key_display_name': 'Engineering',
   'count': 0},
  {'key': 'https://openalex.org/fields/13',
   'key_display_name': 'Biochemistry, Genetics and Molecular Biology',
   'count': 0},
  {'key': 'https://openalex.org/fields/17',
   'key_display_name': 'Computer Science',
   'count': 0},
  {'key': 'https://openalex.org/fields/12',
   'key_display_name': 'Arts and Humanities',
   'count': 0},
  {'key': 'https://openalex.org/fields/23',
   'key_display_name': 'Environmental Science',
   'count': 0},
  {'key': 'https://openalex.org/fields/11',
   'key_display_name': 'Agricultural and Biological Sciences',
   'count': 0},
  {'key': 'https

In [74]:
df=pd.DataFrame(dict_countries)
df['percentage']=df.apply(lambda row: round(row['count']*100/row['total'],1),axis=1)

ZeroDivisionError: division by zero

In [75]:
df[df.year=='2021']

Unnamed: 0,year,country,count,total,rank
52,2021,Medicine,0,0,1
53,2021,Social Sciences,0,0,2
54,2021,Engineering,0,0,3
55,2021,"Biochemistry, Genetics and Molecular Biology",0,0,4
56,2021,Computer Science,0,0,5
57,2021,Arts and Humanities,0,0,6
58,2021,Environmental Science,0,0,7
59,2021,Agricultural and Biological Sciences,0,0,8
60,2021,Materials Science,0,0,9
61,2021,Physics and Astronomy,0,0,10


In [69]:
categories=list(df[df.year=='2022'].country)[:15]
part_2022=[float(df[(df.year=='2022')&(df.country==i)].percentage) for i in categories]
part_2019=[float(df[(df.year=='2019')&(df.country==i)].percentage) for i in categories]

  part_2022=[float(df[(df.year=='2022')&(df.country==i)].percentage) for i in categories]
  part_2019=[float(df[(df.year=='2019')&(df.country==i)].percentage) for i in categories]


In [70]:
categories = list(df[df.year == '2022'].country)[:20]
part_2022 = [float(df[(df.year == '2022') & (df.country == i)].percentage) for i in categories]
part_2019 = [float(df[(df.year == '2019') & (df.country == i)].percentage) for i in categories]

options_as_str = f"""
{{
    chart: {{
        type: 'column'
    }},
    title: {{
        text: 'Part of publication about durability in 2019 and 2022'
    }},
    subtitle: {{
        text: 'Source: OpenAlex.com'
    }},
    xAxis: {{
        categories: {categories},
        crosshair: true
    }},
    yAxis: {{
        min: 0,
        title: {{
            text: 'part'
        }}
    }},
    tooltip: {{
        headerFormat: '<span style="font-size:10px">{{point.key}}</span><table>',
        pointFormat: '<tr><td style="color:{{series.color}};padding:0">{{series.name}}: </td><td style="padding:0"><b>{{point.y:.1f}} %</b></td></tr>',
        footerFormat: '</table>',
        shared: true,
        useHTML: true
    }},
    plotOptions: {{
        column: {{
            pointPadding: 0.2,
            borderWidth: 0
        }}
    }},
    series: [{{
        name: '2019',
        data: {part_2019}
    }}, {{
        name: '2022',
        data: {part_2022}
    }}]
}}
"""

options = HighchartsOptions.from_js_literal(options_as_str)


  part_2022 = [float(df[(df.year == '2022') & (df.country == i)].percentage) for i in categories]
  part_2019 = [float(df[(df.year == '2019') & (df.country == i)].percentage) for i in categories]


In [71]:
chart = Chart.from_options(options)
chart.display()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>