In [None]:
import os
import sys
import fasttext
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import neattext.functions as nfx
import requests
import time

current_directory = os.getcwd()
root_directory = os.path.abspath(os.path.join(current_directory, '..', '..')) 
if root_directory not in sys.path:
    sys.path.append(root_directory)

import logging
logging.basicConfig(level=logging.CRITICAL)

from code_utils.utils import aplatir 

In [None]:
climat_topics=['T10753', 'T11988', 'T10122', 'T10139', 'T10577', 'T11862', 'T13377', 'T10898', 'T12981', 'T10174', 'T11405', 'T13396', 'T12213', 'T12639', 'T11056', 'T11880', 'T11753', 'T10439', 'T10766', 'T10438', 'T12806', 'T11244', 'T11186', 'T10029', 'T11588', 'T10075', 'T10017', 'T10644', 'T10895', 'T10266', 'T10230', 'T11320', 'T10341', 'T10005', 'T10347', 'T10765', 'T10032', 'T11333', 'T10226', 'T10466', 'T10647', 'T11483', 'T10330', 'T10471', 'T10199', 'T10555', 'T11259', 'T11594', 'T10643', 'T10319', 'T10487', 'T10435', 'T10659', 'T10089','T10255', 'T11088', 'T10930', 'T12073', 'T11459', 'T11061', 'T10302', 'T10190', 'T10779', 'T10535', 'T12617', 'T12414', 'T10166', 'T10004', 'T10889', 'T11913', 'T10398', 'T11760']


In [None]:
climat_topics_str=['climate change','marine ecosystems','ecology','ecological','environmental','methane emission','ocean acidification','arctic sea ice','environmental impact','climate ethics','hydrological cycle','energy transition','influence of climate','urban heat islands','mitigation strategies','impact on climate','environmental policies','carbon dioxide capture','carbon dioxide storage','soil carbon dynamics','sustainable development','environmental governance','atmospheric aerosols','marine biogeochemistry','biodiversity','global flood risk','arctic sea ice','ocean surface waves and wind interaction','aeolian geomorphology','wind erosion dynamics','coastal protection','water resource management','air pollution','deforestation','energy efficiency','global drought','landslide hazards','paleoclimat',"climate resilience", "climate adaptation", "carbon footprint", "carbon neutrality", "climate mitigation", "climate models", "climate scenarios", "zero emissions", "renewable energy", "sustainable agriculture", "ecosystem services", "climate vulnerability", "climate impacts", "climate policy", "global warming", "low-carbon", "climate change adaptation strategies", "ecosystem-based adaptation", "climate finance", "fossil fuel phase-out", "carbon trading", "climate-induced displacement", "environmental resilience", "greenhouse gas emissions", "land-use change", "climate risk assessment", "climate-induced migration", "carbon sequestration", "bioenergy", "energy transition pathways", "climate justice", "climate-sensitive diseases", "adaptation planning", "geoengineering", "flood management", "ecosystem restoration", "heat stress", "mitigation", "carbon pricing", "sustainable forestry", "blue carbon", "adaptation and resilience building","green infrastructure", "climate", "urban adaptation strategies", "climate-smart agriculture", "drought mitigation", "weather extremes", "ocean conservation", "green energy" "floodplain management", "temperature rise mitigation"]
climat_topics_str=['"'+str(x)+'"' for x in climat_topics_str if str(x).find(' ')>=0]+[str(x) for x in climat_topics_str if str(x).find(' ')==-1]

In [None]:
climat_topics_OR=('|').join(climat_topics)
climat_topics_OR_str=(' OR ').join(climat_topics_str)

In [None]:
climat_topics_OR_str

In [None]:
climat_topics_OR

In [None]:
dict_countries=[]

for year in range(2013,2024,1):
    url=f"https://api.openalex.org/works?filter=has_doi:true,publication_year:{year},topics.id:{climat_topics_OR},title_and_abstract.search:({climat_topics_OR_str})&group-by=institutions.country_code"
    response = requests.get(url)
    data = response.json()
    for i in range(0,15,1):
        dict_countries.append({"year":str(year),'country':data['group_by'][i]['key'].replace('https://openalex.org/countries/',''),'count':data['group_by'][i]['count'],'total':data['meta']['count'], 'rank': i+1})

In [None]:
df=pd.DataFrame(dict_countries)
df['percentage']=df.apply(lambda row: round(row['count']*100/row['total'],1),axis=1)

In [None]:
df[df.year=='2020']

In [None]:
list(df[df.country=='ES']['rank'])

In [None]:
df=pd.DataFrame(dict_countries)

In [None]:
years = df['year'].unique()

color_dict = {
    'US': 'pink',
    'GB': 'green',
    'CN': 'red',
    'DE': 'yellow',
    'AU': 'purple',
    'CA': 'cyan',
    'FR': 'blue',
    'IN': 'orange',
    'IT': 'brown'
}

bar_width = 0.1
index = np.arange(len(years)) 
fig, ax = plt.subplots(figsize=(14, 8))

used_countries = set() 
for i, year in enumerate(years):
    year_data = df[df['year'] == year]
    
    top_8_countries = year_data.nlargest(8, 'count')['country']
    year_data_top_8 = year_data[year_data['country'].isin(top_8_countries)]
    
    for j, country in enumerate(top_8_countries):
        country_data = year_data_top_8[year_data_top_8['country'] == country]
        
        color = color_dict.get(country, 'gray')
        
        bar = ax.bar(index[i] + j * bar_width, country_data['percentage'], bar_width, label=country if country not in used_countries else "", color=color)
        
        if country not in used_countries:
            used_countries.add(country)

ax.set_xlabel('Years')
ax.set_ylabel('Part (%)')
ax.set_title('Part of publications in OpenAlex by country and by year')
ax.set_xticks(index + bar_width * 3.5)
ax.set_xticklabels(years)
ax.legend(title="Countries", bbox_to_anchor=(1.05, 1), loc='upper left')

plt.tight_layout()
plt.show()

with Highcharts

In [None]:
# pip install virtualenv
# virtualenv -p python3.10 myenv
# myenv\Scripts\activate
# pip install highcharts_core

In [None]:
from highcharts_core.chart import Chart
from highcharts_core.options import HighchartsOptions
from highcharts_core.options.plot_options import PlotOptions
from highcharts_core.options.axes.x_axis import XAxis
from highcharts_core.options.axes.y_axis import YAxis,YAxisTitle
from highcharts_core.options.axes.accessibility import AxisAccessibility
from highcharts_core.options.axes.title import AxisTitle
from highcharts_core.options.title import Title
from highcharts_core.options.subtitle import Subtitle
from highcharts_core.options.legend import Legend
from highcharts_core.options.plot_options.series import SeriesOptions
from highcharts_core.options.series.area import LineSeries
from highcharts_core.options.series.labels import SeriesLabel
from highcharts_core.options.responsive import Responsive, ResponsiveRules, Condition
from highcharts_core.constants import EnforcedNull

In [None]:
chart_options = HighchartsOptions(
    title = Title(text = 'Rank for 10 countries by year in OpenAlex publications',
                  align = 'left'),
    subtitle = Subtitle(text = 'Source: <a href="https://irecusa.org/programs/solar-jobs-census/" target="_blank">OpenAlex</a>',
                        align = 'left'),
    y_axis = YAxis(title = YAxisTitle(text = 'Rank'), reversed=True, min=1),
    x_axis = XAxis(
        accessibility = AxisAccessibility(range_description = 'Range: 2013 to 2023')
    ),
    legend = Legend(layout = 'vertical',
                    align = 'right',
                    vertical_align = 'middle'),
    plot_options = PlotOptions(series = SeriesOptions(point_start = 2013,
                                                      label = SeriesLabel(connector_allowed = False)))
)

In [None]:
override_options = HighchartsOptions(legend = Legend(layout = 'horizontal',
                                                     align = 'center',
                                                     vertical_align = 'bottom'))
responsive_config = Responsive(
    rules = [
        ResponsiveRules(chart_options = override_options,
                        condition = Condition(max_width = 500))
    ]
)
chart_options.responsive = responsive_config

In [None]:
series1 = LineSeries(name = 'US',
                     data = [1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2])
series2 = LineSeries(name = 'GB',
                     data = [2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3],
                    color = 'green')
series3 = LineSeries(name = 'CN',
                     data = [3, 3, 3, 2, 2, 2, 2, 2, 2, 1, 1],
                     color = 'red')
series4 = LineSeries(name = 'DE',
                     data = [4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
                     color= 'purple')
series5 = LineSeries(name = 'FR',
                     data = [7, 7, 7, 7, 7, 7, 7, 6, 6, 5, 6],
                     color = 'blue')
series6 = LineSeries(name = 'IN',
                     data = [15, 13, 13, 13, 11, 12, 11, 11, 10, 8, 5],
                     color = 'orange')
series7 = LineSeries(name = 'AU',
                     data = [5, 5, 5, 5, 5, 5, 5, 5, 7, 6, 7],
                     color = 'grey')
series8 = LineSeries(name = 'CA',
                     data = [6, 6, 6, 6, 6, 6, 6, 7, 5, 7, 9],
                     color = 'brown')
series9 = LineSeries(name = 'IT',
                     data = [9, 9, 8, 8, 8, 9, 9, 8, 8, 9, 8],
                     color = 'green')
series10 = LineSeries(name = 'ES',
                     data = [8, 8, 9, 9, 9, 8, 8, 9, 9, 10, 10],
                     color = 'yellow')

chart_options.add_series(series1, series2, series3, series4, series5, series6, series7,series8,series9,series10)

In [None]:
chart = Chart.from_options(chart_options)
chart.display()

2nd ipcc model on openAlex publications 

In [None]:
data_OA=[]
countries=['FR']#['US','GB','CN','DE',
for country in countries:
    print(country)
    url=f"https://api.openalex.org/works?filter=has_doi:true,topics.id:{climat_topics_OR},title_and_abstract.search:({climat_topics_OR_str}),institutions.country_code:{country}&select=doi,title,topics,locations&per_page=200&page=1"
    response = requests.get(url)
    data = response.json()
    if data['meta']['count']//data['meta']['per_page']==0:
        nb_page=data['meta']['count']//data['meta']['per_page']+1
    else:
        nb_page=nb_page+1
    print(nb_page)
    cursor='*'
    for j in range(1,nb_page,1):
        if j % 500 and j!=1:
            time.sleep(60)
        print(j)
        url=f"https://api.openalex.org/works?filter=has_doi:true,topics.id:{climat_topics_OR},title_and_abstract.search:({climat_topics_OR_str}),institutions.country_code:{country}&per_page=200&cursor={cursor}"
        response = requests.get(url)
        data = response.json()
        cursor=data['meta']['next_cursor']
        print(url) 
        for i in range(len(data['results'])):
            if 'results' in list(data.keys()):
                doi=data['results'][i]['doi']
                title=data['results'][i]['title']
                topics=[topic.get('display_name') for topic in data['results'][i]['topics'] if 'display_name' in list(topic.keys())]
                locations_names=list(pd.Series([location['source'].get('display_name',None) for location in data['results'][i]['locations'] if pd.isna(location['source'])==False]).drop_duplicates().dropna())
                locations_ids=list(pd.Series([location['source'].get('issn_l',None) for location in data['results'][i]['locations'] if pd.isna(location['source'])==False]).drop_duplicates().dropna())
                data_OA.append({'doi':doi, 'title':title, 'topics':topics, 'locations_names':locations_names, 'locations_ids': locations_ids, 'countries':country})

In [None]:
data['meta']

In [None]:
pd.DataFrame(data_OA)

In [None]:
pd.DataFrame(data_OA).to_json('data_OA_FR.json',orient='records',lines=True)


utilisation du model_wg

In [None]:
path=os.path.join(root_directory, 'notebooks', 'models_fasttext_or_random_forest','fasttext_model_teds_wg.bin')

In [None]:
model = fasttext.load_model(path)

In [None]:
df.loc[:,'doi']=df.loc[:,'doi'].apply(lambda x: str(nfx.remove_stopwords(x)).replace('https://doi.org/','').lower())    

In [None]:
df.loc[:,'title']=df.loc[:,'title'].apply(lambda x: str(nfx.remove_stopwords(x)).replace('}','').replace('{','').lower())

In [None]:
df.loc[:,'topics']=df.loc[:,'topics'].apply(lambda x: nfx.remove_stopwords(' '.join(x)).lower())

In [None]:
x=(('__label__adaptation', '__label__mitigation', '__label__science'), np.array([0.79819679, 0.12941273, 0.02596736]))

In [None]:
df.loc[:,'prediction']=df.apply(lambda row: model.predict(nfx.remove_stopwords(f"{row['title']} {row['topics']} {row['locations_names']} {row['locations_ids']}".lower()), k=-1),axis=1)
