In [10]:
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import neattext.functions as nfx
import requests
import time
from tqdm import tqdm

current_directory = os.getcwd()
root_directory = os.path.abspath(os.path.join(current_directory, '..', '..')) 
if root_directory not in sys.path:
   sys.path.append(root_directory)

import logging
logging.basicConfig(level=logging.CRITICAL)

In [11]:
climat_topics_str=['SUSTAINABILITY', 'DURABILITY', 'RESILIENCE', 'HABITABILITY']
climat_topics_str=['"'+str(x).lower()+'"' for x in climat_topics_str ]

In [12]:
climat_topics_OR_str=(' OR ').join(climat_topics_str)

In [13]:
climat_topics_OR_str

'"sustainability" OR "durability" OR "resilience" OR "habitability"'

In [14]:
url=f"https://api.openalex.org/works?filter=has_doi:true,publication_year:{2022},locations.source.display_name:{climat_topics_OR_str},title_and_abstract.search:({climat_topics_OR_str})&group-by=institutions.country_code"
response = requests.get(url)
data = response.json()

In [15]:
data

{'error': 'Invalid query parameters error.',
 'message': 'locations.source.display_name is not a valid field. Valid fields are underscore or hyphenated versions of: abstract.search, abstract.search.no_stem, apc_list.currency, apc_list.provenance, apc_list.value, apc_list.value_usd, apc_paid.currency, apc_paid.provenance, apc_paid.value, apc_paid.value_usd, author.id, author.orcid, authors_count, authorships.affiliations.institution_ids, authorships.author.id, authorships.author.orcid, authorships.countries, authorships.institutions.continent, authorships.institutions.country_code, authorships.institutions.id, authorships.institutions.is_global_south, authorships.institutions.lineage, authorships.institutions.ror, authorships.institutions.type, authorships.is_corresponding, best_oa_location.is_accepted, best_oa_location.is_oa, best_oa_location.is_published, best_oa_location.landing_page_url, best_oa_location.license, best_oa_location.license_id, best_oa_location.source.host_organization

In [49]:
dict_countries=[]

for year in range(2013,2024,1):
    url=f"https://api.openalex.org/works?filter=has_doi:true,publication_year:{year},title_and_abstract.search:({climat_topics_OR_str})&group-by=institutions.country_code"
    response = requests.get(url)
    data = response.json()
    for i in range(0,30,1):
        dict_countries.append({"year":str(year),'country':data['group_by'][i]['key'].replace('https://openalex.org/countries/',''),'count':data['group_by'][i]['count'],'total':data['meta']['count'], 'rank': i+1})

In [50]:
df=pd.DataFrame(dict_countries)
df['percentage']=df.apply(lambda row: round(row['count']*100/row['total'],1),axis=1)

In [51]:
df[df.year=='2022']

Unnamed: 0,year,country,count,total,rank,percentage
270,2022,US,36146,246142,1,14.7
271,2022,CN,29638,246142,2,12.0
272,2022,IN,15534,246142,3,6.3
273,2022,GB,15358,246142,4,6.2
274,2022,ID,9570,246142,5,3.9
275,2022,DE,9238,246142,6,3.8
276,2022,IT,8957,246142,7,3.6
277,2022,AU,7985,246142,8,3.2
278,2022,ES,6800,246142,9,2.8
279,2022,CA,6759,246142,10,2.7


with Highcharts

In [19]:
# pip install virtualenv
# virtualenv -p python3.10 myenv
# myenv\Scripts\activate
# pip install highcharts_core

In [20]:
from highcharts_core.chart import Chart
from highcharts_core.options import HighchartsOptions
from highcharts_core.options.plot_options import PlotOptions
from highcharts_core.options.axes.x_axis import XAxis
from highcharts_core.options.axes.y_axis import YAxis,YAxisTitle
from highcharts_core.options.axes.accessibility import AxisAccessibility
from highcharts_core.options.axes.title import AxisTitle
from highcharts_core.options.title import Title
from highcharts_core.options.subtitle import Subtitle
from highcharts_core.options.legend import Legend
from highcharts_core.options.plot_options.series import SeriesOptions
from highcharts_core.options.series.area import LineSeries
from highcharts_core.options.series.labels import SeriesLabel
from highcharts_core.options.responsive import Responsive, ResponsiveRules, Condition
from highcharts_core.constants import EnforcedNull
from highcharts_core.options.credits import Credits

In [29]:
chart_options = HighchartsOptions(
    chart={
        'width': 800,  
        'height': 600  
    },
    title = Title(text = 'Part of publications in OpenAlex for 10 countries by year', #Part of publications in OpenAlex for 10 countries by year
                  align = 'left',                                                    #Rank for 10 countries by year in OpenAlex publications
                  style={'fontSize': '40px'}),                                                 
    subtitle = Subtitle(text = 'Source: <a href="https://irecusa.org/programs/solar-jobs-census/" target="_blank">OpenAlex</a>',
                        align = 'left'),
    y_axis = YAxis(title = YAxisTitle(text = 'Part', style={'fontSize': '20px'}), style={'fontSize': '30px'}, min=1, labels={'style': {'fontSize': '20px'}}), #reversed=True
    x_axis = XAxis(title = AxisTitle(text = 'Year', style={'fontSize': '20px'}),
                   labels={'style': {'fontSize': '20px'}},
        accessibility = AxisAccessibility(range_description = 'Range: 2013 to 2023')),
    legend = Legend(layout = 'vertical',
                    align = 'right',
                    vertical_align = 'middle'),
    plot_options = PlotOptions(series = SeriesOptions(point_start = 2013,
                                                      label = SeriesLabel(connector_allowed = False,
                                                                          style={'fontSize': '20px'}))),
    credits = Credits(enabled=False),
    exporting={
        'enabled': False  
    }
)

In [30]:
override_options = HighchartsOptions(legend = Legend(layout = 'horizontal',
                                                     align = 'center',
                                                     vertical_align = 'bottom'))
responsive_config = Responsive(
    rules = [
        ResponsiveRules(chart_options = override_options,
                        condition = Condition(max_width = 500))
    ]
)
chart_options.responsive = responsive_config

In [31]:
','.join(list(df[df.year=='2022'].country))

'US,CN,IN,GB,ID,DE,IT,AU,ES,CA,FR,NL,MY,JP,BR,RU,KR,TR,ZA,PK'

In [32]:
df[df.year=='2022']

Unnamed: 0,year,country,count,total,rank,percentage
180,2022,US,36146,246142,1,14.7
181,2022,CN,29638,246142,2,12.0
182,2022,IN,15534,246142,3,6.3
183,2022,GB,15358,246142,4,6.2
184,2022,ID,9570,246142,5,3.9
185,2022,DE,9238,246142,6,3.8
186,2022,IT,8957,246142,7,3.6
187,2022,AU,7985,246142,8,3.2
188,2022,ES,6800,246142,9,2.8
189,2022,CA,6759,246142,10,2.7


In [33]:
series1 = LineSeries(name = 'US',
                     data = list(df[df.country=='US']['percentage']))
series2 = LineSeries(name = 'CN',
                     data = list(df[df.country=='CN']['percentage']),
                    color = 'green')
series3 = LineSeries(name = 'IN',
                     data = list(df[df.country=='IN']['percentage']),
                     color = 'red')
series4 = LineSeries(name = 'GB',
                     data = list(df[df.country=='GB']['percentage']),
                     color= 'purple')
series5 = LineSeries(name = 'ID',
                     data = list(df[df.country=='ID']['percentage']),
                     color = 'blue')
series6 = LineSeries(name = 'DE',
                     data = list(df[df.country=='DE']['percentage']),
                     color = 'orange')
series9 = LineSeries(name = 'IT',
                     data = list(df[df.country=='IT']['percentage']),
                     color = 'green')
series7 = LineSeries(name = 'AU',
                     data = list(df[df.country=='AU']['percentage']),
                     color = 'grey')
series8 = LineSeries(name = 'ES',
                     data = list(df[df.country=='ES']['percentage']),
                     color = 'brown')
series9 = LineSeries(name = 'CA',
                     data = list(df[df.country=='CA']['percentage']),
                     color = 'green')
series10 = LineSeries(name = 'FR',
                     data = list(df[df.country=='FR']['percentage']),
                     color = 'yellow')

chart_options.add_series(series1, series2, series3, series4, series5, series6, series9, series7,series8,series9,series10)

In [34]:
chart = Chart.from_options(chart_options)
chart.display()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [53]:
categories=list(df[df.year=='2022'].country)[:15]
part_2022=[float(df[(df.year=='2022')&(df.country==i)].percentage) for i in categories]
part_2019=[float(df[(df.year=='2019')&(df.country==i)].percentage) for i in categories]

  part_2022=[float(df[(df.year=='2022')&(df.country==i)].percentage) for i in categories]
  part_2019=[float(df[(df.year=='2019')&(df.country==i)].percentage) for i in categories]


In [61]:
categories = list(df[df.year == '2022'].country)[:20]
part_2022 = [float(df[(df.year == '2022') & (df.country == i)].percentage) for i in categories]
part_2019 = [float(df[(df.year == '2019') & (df.country == i)].percentage) for i in categories]

options_as_str = f"""
{{
    chart: {{
        type: 'column'
    }},
    title: {{
        text: 'Part of publication about durability in 2019 and 2022'
    }},
    subtitle: {{
        text: 'Source: OpenAlex.com'
    }},
    xAxis: {{
        categories: {categories},
        crosshair: true
    }},
    yAxis: {{
        min: 0,
        title: {{
            text: 'part'
        }}
    }},
    tooltip: {{
        headerFormat: '<span style="font-size:10px">{{point.key}}</span><table>',
        pointFormat: '<tr><td style="color:{{series.color}};padding:0">{{series.name}}: </td><td style="padding:0"><b>{{point.y:.1f}} %</b></td></tr>',
        footerFormat: '</table>',
        shared: true,
        useHTML: true
    }},
    plotOptions: {{
        column: {{
            pointPadding: 0.2,
            borderWidth: 0
        }}
    }},
    series: [{{
        name: '2019',
        data: {part_2019}
    }}, {{
        name: '2022',
        data: {part_2022}
    }}]
}}
"""

options = HighchartsOptions.from_js_literal(options_as_str)


  part_2022 = [float(df[(df.year == '2022') & (df.country == i)].percentage) for i in categories]
  part_2019 = [float(df[(df.year == '2019') & (df.country == i)].percentage) for i in categories]


In [62]:
chart = Chart.from_options(options)
chart.display()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>