<a href="https://colab.research.google.com/github/joaochenriques/WaveEnergyStatistics/blob/main/WaveEnergyStatistics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as mpl
import sys, pathlib
import itertools
from scipy import optimize
from sortedcontainers import SortedDict

from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,AutoMinorLocator)

import pathlib, subprocess

def cmdcall( cmd ):
    output = subprocess.getoutput( cmd )
    print(output)

if not pathlib.Path("mpl_utils.py").exists():
  cmdcall( 'curl -O https://raw.githubusercontent.com/joaochenriques/ipynb_libs/main/mpl_utils.py' )

import mpl_utils as mut
mut.config_plots()

markers = ( 'o', '^', 's', 'v', 'H', 'X', 'P' )

from matplotlib_inline.backend_inline import set_matplotlib_formats
set_matplotlib_formats('svg')

In [2]:
cmdcall( 'pip install dataclassy' )
from dataclassy import dataclass

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
cmdcall( 'pip install iso3166' )
from iso3166 import countries

country_lst = []
for c in countries:
    country_lst.append( c.name )

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [23]:
def left_align( df ):
    left_aligned_df = df.style.set_properties(**{'text-align': 'left'})
    left_aligned_df = left_aligned_df.set_table_styles(
        [dict(selector='th', props=[('text-align', 'left')])]
    )
    return left_aligned_df

# Scopus advanced search query

```
TITLE-ABS-KEY-AUTH(WAVE-ENERGY) AND PUBYEAR AFT 2003 AND DOCTYPE(ar OR re) AND
(
    SRCTITLE(applied-energy) OR
    SRCTITLE(applied-ocean-research) OR
    SRCTITLE(energy) OR
    SRCTITLE(energy-conversion-and-management ) OR
    SRCTITLE(energies) OR
    SRCTITLE(ieee-transactions-on-sustainable-energy) OR
    SRCTITLE(iet-renewable-power-generation) OR
    SRCTITLE(international-journal-of-marine-energy ) OR
    SRCTITLE(international-journal-of-offshore-and-polar-engineering ) OR
    SRCTITLE(journal-of-offshore-mechanics-and-arctic-engineering) OR
    SRCTITLE(Journal-of-Ocean-Engineering-and-Marine-Energy) OR
    SRCTITLE(ocean-engineering) OR
    SRCTITLE(marine-structures) OR
    SRCTITLE(renewable-energy) OR
    SRCTITLE(renewable-sustainable-energy-reviews) 
)
```

In [4]:
filename = 'scopus_20230608D.csv'
if 'google.colab' in sys.modules and not pathlib.Path( f"{filename}").exists():
    cmdcall( f'curl -O https://raw.githubusercontent.com/joaochenriques/PAS_STATS/main/{filename}' )

In [5]:
df = pd.read_csv( filename )
df.keys()

Index(['Authors', 'Author full names', 'Author(s) ID', 'Title', 'Year',
       'Source title', 'Volume', 'Issue', 'Art. No.', 'Page start', 'Page end',
       'Page count', 'Cited by', 'DOI', 'Link', 'Affiliations',
       'Authors with affiliations', 'Author Keywords', 'Index Keywords',
       'Correspondence Address', 'Document Type', 'Publication Stage',
       'Source', 'EID'],
      dtype='object')

In [19]:
authors_lst = df['Authors']
authors_ID_lst = df['Author(s) ID'] 
affiliations_lst = df['Affiliations']
title_lst = df['Title']
journal_lst = df['Source title']
year_lst = df['Year']
citations_lst = df['Cited by']
DOI_lst = df['DOI']
stage_lst = df['Publication Stage'] 

replacements_dic = {   
    'Ireland (formerly at the University of Plymouth)': 'Ireland',
    'Univ. Paris6': 'France',
    'Chinese Academy of Sciences': 'China',
    'Instituto Superior Tx000E9': 'Portugal'
}

# Countries ordered by percentage of the authors's country

In [29]:
countries_frac_dic = {}
total_valid_papers = 0

for n, ( affiliations, stage ) in enumerate( zip( affiliations_lst, stage_lst ) ):
    if stage == 'Final' and affiliations == affiliations: # detect NANs
        total_valid_papers += 1

        institutions_lst = affiliations.split( ';' )
        frac = 1.0 / len(institutions_lst)

        for institution in institutions_lst:
            country = institution.rsplit( ',', 1 )[-1].strip()

            if country in replacements_dic:
                country = replacements_dic[country]

            if not country in countries_frac_dic:
                countries_frac_dic[country] = frac
            else:
                countries_frac_dic[country] += frac

sorted_country_frac_rank = { k: v for k, v in sorted( countries_frac_dic.items(), key=lambda item: item[1], reverse=True ) }

df_country_name_lst = []
df_country_num_lst = []

df_country_dic = { 
        "Country": df_country_name_lst,
        "Num papers": df_country_num_lst
}  

df_country_name_lst.append( "Total number of papers" ) 
df_country_num_lst.append( int(total_valid_papers) )

for n, ( name, num ) in enumerate( sorted_country_frac_rank.items() ):
    df_country_name_lst.append( name ) 
    df_country_num_lst.append( int(num) )

left_align( pd.DataFrame( df_country_dic ).head(10) )

Unnamed: 0,Country,Num papers
0,Total number of papers,3580
1,China,731
2,United Kingdom,369
3,United States,360
4,Portugal,214
5,Spain,177
6,Ireland,155
7,Italy,152
8,Australia,144
9,India,140


# Number of papers per author, citations, and highest cited paper

In [8]:
@dataclass
class data:
    name: str = None
    num_papers: int = 0
    citations: int = 0

In [10]:
papers_dic = {}

for n, ( authors, authors_ID, citations, DOI, stage ) in enumerate( zip( authors_lst, authors_ID_lst, citations_lst, DOI_lst, stage_lst ) ):
    if stage == 'Final' and authors == authors and authors_ID == authors_ID: # check NANs
        for ( author, ID ) in zip( authors.split(';'), str( authors_ID ).split(';') ):
            author = author.strip()
            ID = ID.strip()

            if ID in papers_dic:
                papers_dic[ID].num_papers += 1
                papers_dic[ID].citations += int(citations)
            else:
                papers_dic[ID] = data( author, 1, int(citations) )

In [26]:
sorted_papers_dic = { k: v for k, v in sorted( papers_dic.items(), key=lambda item: item[1].num_papers, reverse=True ) }

df_authors_name_lst = []
df_authors_num_lst = []

df_authors_dic = { 
        "Author": df_authors_name_lst,
        "Num papers": df_authors_num_lst
}  

for n, dt in enumerate( sorted_papers_dic.values() ):
    df_authors_name_lst.append( dt.name ) 
    df_authors_num_lst.append( dt.num_papers )

left_align( pd.DataFrame( df_authors_dic ).head(20) )

Unnamed: 0,Author,Num papers
0,Ringwood J.V.,92
1,Iglesias G.,79
2,Guedes Soares C.,64
3,Gato L.M.C.,63
4,Henriques J.C.C.,62
5,Falcão A.F.O.,56
6,Ning D.,47
7,Greaves D.,45
8,Leijon M.,44
9,Wang Z.L.,42


In [25]:
sorted_citations_dic = { k: v for k, v in sorted( papers_dic.items(), key=lambda item: item[1].citations, reverse=True ) }

df_citations_name_lst = []
df_citations_num_lst = []

df_citations_dic = { 
        "Author": df_citations_name_lst,
        "Citations": df_citations_num_lst
}  

for n, dt in enumerate( sorted_citations_dic.values() ):
    df_citations_name_lst.append( dt.name ) 
    df_citations_num_lst.append( dt.citations )

left_align( pd.DataFrame( df_citations_dic ).head(10) )

Unnamed: 0,Author,Citations
0,Falcão A.F.O.,4991
1,Iglesias G.,4914
2,Wang Z.L.,4696
3,Ringwood J.V.,3159
4,Guedes Soares C.,2643
5,Babarit A.,2527
6,Henriques J.C.C.,2384
7,Moan T.,2337
8,Carballo R.,2022
9,Jiang T.,1977


In [17]:
@dataclass
class paper_rank:
    authors: str = None
    title: str = None
    journal: str =None
    year: int = 0
    DOI: str = None
    citations: int = 0

In [20]:
papers_rank_dic = {}

for n, ( authors, title, journal, year, DOI, citations, stage ) in enumerate( zip( authors_lst, title_lst, journal_lst, year_lst, DOI_lst, citations_lst, stage_lst ) ):
    if stage == 'Final' and authors == authors: # check NANs
        papers_rank_dic[DOI] = paper_rank( authors, title, journal, year, DOI, int( citations ) )

In [24]:
sorted_papers_rank_dic = { k: v for k, v in sorted( papers_rank_dic.items(), key=lambda item: item[1].citations, reverse=True ) }

df_authors_name_lst = []
df_title_lst = []
df_journal_lst = []
df_year_lst = []
df_DOI_lst = []
df_citations_lst = []

df_papers_rank_dic = { 
        "Authors": df_authors_name_lst,
        "Title": df_title_lst,
        "Journal": df_journal_lst,
        "Year": df_year_lst,
        "DOI": df_DOI_lst,
        "citations": df_citations_lst
}  

for n, dt in enumerate( sorted_papers_rank_dic.values() ):
    df_authors_name_lst.append( dt.authors ) 
    df_title_lst.append( dt.title )
    df_journal_lst.append( dt.journal )
    df_year_lst.append( dt.year )
    df_DOI_lst.append( dt.DOI )
    df_citations_lst.append( dt.citations )

left_align( pd.DataFrame( df_papers_rank_dic ).head(10) )

Unnamed: 0,Authors,Title,Journal,Year,DOI,citations
0,Falcão A.F.d.O.,Wave energy utilization: A review of the technologies,Renewable and Sustainable Energy Reviews,2010,10.1016/j.rser.2009.11.003,2035
1,Drew B.; Plummer A.R.; Sahinkaya M.N.,A review of wave energy converter technology,"Proceedings of the Institution of Mechanical Engineers, Part A: Journal of Power and Energy",2009,10.1243/09576509JPE782,867
2,Falnes J.,A review of wave-energy extraction,Marine Structures,2007,10.1016/j.marstruc.2007.09.001,809
3,Wang Z.L.; Jiang T.; Xu L.,Toward the blue energy dream by triboelectric nanogenerator networks,Nano Energy,2017,10.1016/j.nanoen.2017.06.035,727
4,Falcão A.F.O.; Henriques J.C.C.,Oscillating-water-column wave energy converters and air turbines: A review,Renewable Energy,2016,10.1016/j.renene.2015.07.086,594
5,Babarit A.; Hals J.; Muliawan M.J.; Kurniawan A.; Moan T.; Krokstad J.,Numerical benchmarking study of a selection of wave energy converters,Renewable Energy,2012,10.1016/j.renene.2011.10.002,551
6,López I.; Andreu J.; Ceballos S.; Martínez De Alegría I.; Kortabarria I.,Review of wave energy technologies and the necessary power-equipment,Renewable and Sustainable Energy Reviews,2013,10.1016/j.rser.2013.07.009,533
7,Gunn K.; Stock-Williams C.,Quantifying the global wave power resource,Renewable Energy,2012,10.1016/j.renene.2012.01.101,436
8,Pérez-Collazo C.; Greaves D.; Iglesias G.,A review of combined wave and offshore wind energy,Renewable and Sustainable Energy Reviews,2015,10.1016/j.rser.2014.09.032,403
9,Hussain A.; Arif S.M.; Aslam M.,Emerging renewable and sustainable energy technologies: State of the art,Renewable and Sustainable Energy Reviews,2017,10.1016/j.rser.2016.12.033,335
