In [675]:
import numpy as np
import pandas as pd
import scipy
import matplotlib.pyplot as plt
import itertools
import seaborn as sns
%matplotlib inline

In [676]:
#Import data
origdata = pd.read_csv('https://raw.githubusercontent.com/borja876/Thinkful-DataScience-Borja/master/WELLCOME_APCspend2013_forThinkful.csv', encoding='latin1')

In [677]:
#Creata dataframe and take a look at the data
wellcome = pd.DataFrame(origdata)
wellcome.head()

Unnamed: 0,PMID/PMCID,Publisher,Journal title,Article title,COST (£) charged to Wellcome (inc VAT when charged)
0,,CUP,Psychological Medicine,Reduced parahippocampal cortical thickness in ...,£0.00
1,PMC3679557,ACS,Biomacromolecules,Structural characterization of a Model Gram-ne...,£2381.04
2,23043264 PMC3506128,ACS,J Med Chem,"Fumaroylamino-4,5-epoxymorphinans and related ...",£642.56
3,23438330 PMC3646402,ACS,J Med Chem,Orvinols with mixed kappa/mu opioid receptor a...,£669.64
4,23438216 PMC3601604,ACS,J Org Chem,Regioselective opening of myo-inositol orthoes...,£685.88


In [678]:
wellcome.dtypes

PMID/PMCID                                             object
Publisher                                              object
Journal title                                          object
Article title                                          object
COST (£) charged to Wellcome (inc VAT when charged)    object
dtype: object

In [679]:
#Check how dirty is the data trying a count approach for the first questions
grouped = wellcome.groupby('Journal title').count()
#Sort the number of articles per journal in a decreasing number and inspect for dirty duplicates
grouped1 = grouped.sort_values('Article title', ascending = False).reset_index()
grouped1.head()
#There are white spaces, inconsistency in the use of 'and' and &, inconsistency in the use of '.' at the end of the words and inconsistent use of upper and lowwer case
#Additionally, the GBP symbol will not allow to calculate stats.

Unnamed: 0,Journal title,PMID/PMCID,Publisher,Article title,COST (£) charged to Wellcome (inc VAT when charged)
0,PLoS One,91,92,92,92
1,PLoS ONE,62,62,62,62
2,Journal of Biological Chemistry,47,48,48,48
3,Nucleic Acids Research,20,21,21,21
4,Proceedings of the National Academy of Sciences,19,19,19,19


In [680]:
#Rename titles in Capital letters
wellcome1 = wellcome.rename(columns={'Journal title': 'JOURNAL TITLE', 'Publisher': 'PUBLISHER', 'Article title': 'ARTICLE TITLE', 'COST (£) charged to Wellcome (inc VAT when charged)': 'COST (GBP) INC. VAT'})
wellcome1.head()

Unnamed: 0,PMID/PMCID,PUBLISHER,JOURNAL TITLE,ARTICLE TITLE,COST (GBP) INC. VAT
0,,CUP,Psychological Medicine,Reduced parahippocampal cortical thickness in ...,£0.00
1,PMC3679557,ACS,Biomacromolecules,Structural characterization of a Model Gram-ne...,£2381.04
2,23043264 PMC3506128,ACS,J Med Chem,"Fumaroylamino-4,5-epoxymorphinans and related ...",£642.56
3,23438330 PMC3646402,ACS,J Med Chem,Orvinols with mixed kappa/mu opioid receptor a...,£669.64
4,23438216 PMC3601604,ACS,J Org Chem,Regioselective opening of myo-inositol orthoes...,£685.88


In [681]:
#Strip blank spaces
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].str.strip()
#Consistency in not using '.' at the end or words: Delete '.'
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].str.replace('.', '')
#Consistency in the use of &: substitute all 'AND' by & - IT DIDN´T WORK
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].str.replace('AND','&')
#Change all the cases to upper cases to check for duplicates
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].str.upper()
#Show results
wellcome1.head()

Unnamed: 0,PMID/PMCID,PUBLISHER,JOURNAL TITLE,ARTICLE TITLE,COST (GBP) INC. VAT
0,,CUP,PSYCHOLOGICAL MEDICINE,Reduced parahippocampal cortical thickness in ...,£0.00
1,PMC3679557,ACS,BIOMACROMOLECULES,Structural characterization of a Model Gram-ne...,£2381.04
2,23043264 PMC3506128,ACS,J MED CHEM,"Fumaroylamino-4,5-epoxymorphinans and related ...",£642.56
3,23438330 PMC3646402,ACS,J MED CHEM,Orvinols with mixed kappa/mu opioid receptor a...,£669.64
4,23438216 PMC3601604,ACS,J ORG CHEM,Regioselective opening of myo-inositol orthoes...,£685.88


In [682]:
#Clean misspellings in JOURNAL TITLE
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['PLOSONE', 'PLOS  ONE', 'PLOS 1', 'PLOS'], 'PLOS ONE')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['ACTA D', 'ACTA CRYSTALLOGRAPHICA SECTION D', 'ACTA CRYSTALLOGRAPHY D', 'ACTA CRYSTALLOGRAPHICA, SECTION D', 'ACTA CRYSTALLOGRAPHICA SECTION D,  BIOLOGICAL CRYSTALLOGRAPHY'], 'ACTA CRYSTALLOGRAPHICA SECTION D: BIOLOGICAL CRYSTALLOGRAPHY')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['AMERICAN JNL EPIDEMIOLOGY'], 'AMERICAN JOURNAL OF EPIDEMIOLOGY')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['AMERICAN JOURNAL OF MEDICAL GENETICS PART A'], 'AMERICAN JOURNAL OF MEDICAL GENETICS')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['ANTIMICROBIAL AGENTS AND CHEMOTHERAPY', 'ANTIMICROBIAL AGFENTS AND CHEMOTHERAPY'], 'ANTIMICROBIAL AGENTS & CHEMOTHERAPY')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['ANGEWANDE CHEMIE', 'ANGEWANDTE CHEMIE INTERNATIONAL EDITION','ANGEW CHEMS INT ED' ], 'ANGEWANDTE CHEMIE')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['BEHAVIOUR RESEARCH AND THERAPY'], 'BEHAVIOR RESEARCH & THERAPY')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['BIOCHEM JOURNAL', 'BIOCHEMICAL JOURNALS'], 'BIOCHEMICAL JOURNAL')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['BIOCHEM SOC TRANS'], 'BIOCHEMICAL SOCIETY TRANSACTIONS')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['BRITISH JOURNAL OF OPHTHALMOLOGY'], 'BRITISH JOURNAL OF OPTHALMOLOGY')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['CELL DEATH DIFFERENTIATION'], 'CELL DEATH & DIFFERENTIATION')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['CHILD: CARE, HEALTH DEVELOPMENT'], 'CHILD: CARE, HEALTH & DEVELOPMENT')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['CURR BIOL'], 'CURRENT BIOLOGY')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['DEV. WORLD BIOETH'], 'DEVELOPING WORLD BIOETHICS')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['EUROPEAN CHILD AND ADOLESCENT PSYCHIATTY'], 'EUROPEAN CHILD & ADOLESCENT PSYCHIATRY')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['FEBS J'], 'FEBS JOURNAL')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['HUM RESOUR HEALTH'], 'HUMAN RESOURCES FOR HEALTH')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['INTERNATIONAL JOURAL OF EPIDEMIOLOGY'], 'INTERNATIONAL JOURNAL OF EPIDEMIOLOGY')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['INTERNATIONAL JOURNAL OF BEHAVIOURAL NUTRITION AND PHYSICAL ACTIVITY'], 'INTERNATIONAL JOURNAL OF BEHAVIORAL NUTRITION AND PHYSICAL ACTIVITY')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['J BIOL CHEM'], 'J BIOL CHEMISTRY')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['JOURNAL OF AUTISM AND DEVELOPMENTAL DISORDERS'], 'JOURNAL OF AUTISM & DEVELOPMENT DISORDERS')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['JOURNAL OF THE ROYAL SOCIETY, INTERFACE'], 'JOURNAL OF THE ROYAL SOCIETY INTERFACE')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['JOURNAL OF VIROL'], 'JOURNAL OF VIROLOGY')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['JOURNAL OF VISUALIZED EXPERIEMENTS', 'JOURNAL OF VISULAIZED EXPERMIMENTS'], 'JOURNAL OF VISUALIZED EXPERIMENTS')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['MICROBES INFECT'], 'MICROBES & INFECTION')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['NUCLEIC ACID RESEARCH', 'NA'], 'NUCLEIC ACIDS RESEARCH')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['PARASIT VECTORS'], 'PARASITES & VECTORS')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['PHILOS TRANS R SOC LONDON B BIOL SCI', 'PHILOSOPHICAL TRANSACTIONS OF THE ROYAL SOCIETY OF LONDON. SERIES B, BIOLOGICAL SCIENCES'], 'PHILOSOPHICAL TRANSACTIONS B')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['PFLUGERS ARCHIV'], 'PFLUGERS ARCHIVE')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['PLOS MEDICINE JOURNAL'], 'PLOS MEDICINE')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['PLOS NEGECTED TROPICAL DISEASES'], 'PLOS NEGLECTED TROPICAL DISEASES')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES (PNAS)','PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES OF THE UNITED STATES OF AMERICA','PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES OF THE USA','PNAS (PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES OF THE UNITED STATES OF AMERICA','PNAS','PNAS USA'], 'PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['PROCEEDINGS B', 'PROCEEDINGS OF THE ROYAL SOCIETY'], 'PROCEEDINGS OF THE ROYAL SOCIETY B: BIOLOGICAL SCIENCES')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['SCI REP','SCIENTIFIC REPORTS-11-00861B'], 'SCIENTIFIC REPORTS')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['SEX TRANSM INFECT'], 'SEXUALLY TRANSMITTED INFECTIONS')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['STUDIES IN HISTORY & PHILOSOPHY OF SCIENCE PART C: STUDIES IN HISTORY AND PHILOSOPHY OF BIOLOGICAL AND BIOMEDICAL SCIENCES'], 'STUDIES IN HISTORY AND PHILOSOPHY OF SCIENCE PART C')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['THE AMERICAL JOURNAL OF HUMAN GENETICS'], 'THE AMERICAN JOURNAL OF HUMAN GENETICS')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['THORAX'], 'THORAX AN INTERNATIONAL JOURNAL FOR RESPIRATORY MEDICINE')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['TROP MED INT HEALTH'], 'TROPICAL MEDICINE & INTERNATIONAL HEALTH')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['MARTEN CHILD NUTR'], 'MATERNAL & CHILD NUTRITION')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['SOCIAL PSYCHIATRY & PSYCHIATRIC EPIDEMIOL'], 'SOCIAL PSYCHIATRY & PSYCHIATRIC EPIDEMIOLOGY')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['AIDS UK', 'AIDS','JOURNAL OF ACQUIRED IMMUNE DEFICIENCY SYNDROMES','JAIDS JOURNAL OF ACQUIRED IMMUNE DEFICIENCY SYNDROMES','JOURNAL OF ACQUIRED IMMUNE DEFICIENCY SYNDROMS (JAIDS)', 'JOURNAL OF AIDS'], 'AIDS JOURNAL')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['ZOONOSES AND PUBLIC HEALTH'], 'ZOONOSES & PUBLIC HEALTH')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['AMERICAL JOURNAL OF PSYCHIATRY'], 'AMERICAN JOURNAL OF PSYCHIATRY')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['BIOLOGICIAL CHEMISTRY'], 'BIOLOGICAL CHEMISTRY')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['STUDIES IN HISTORY AND PHILOSOPHY OF SCIENCE PART C: STUDIES IN HISTORY AND PHILOSOPHY OF BIOLOGICAL AND BIOMEDICAL SCIENCES'], 'STUDIES IN HISTORY AND PHILOSOPHY OF SCIENCE PART C')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['JOURNAL OF NEUROLOGY, NEUROSURGERY & PSYCHIATRY'], 'JOURNAL OF NEUROLOGY, NEUROSURGERY AND PSYCHIATRY')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['BMJ OPEN'], 'BMJ')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['JNL BIOLOGICAL CHEMISTRY','JOURNAL OF BIOL CHEM'], 'JOURNAL BIOLOGICAL CHEMISTRY')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['MCP (MOLECULAR & CELLULAR PROTEOMICS)'], 'MOLECULAR AND CELLULAR PROTEOMICS')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['HAEMATOLOGICA/THE HAEMATOLOGY JOURNAL'], 'HAEMATOLOGICA')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['IJTLD'], 'INTERNATIONAL JOURNAL OF TUBERCULOSIS  AND LUNG DISEASE')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['EPIGENTICS'], 'EPIGENETICS')
wellcome1['JOURNAL TITLE'] = wellcome1['JOURNAL TITLE'].replace(['EMBO'], 'EMBO JOURNAL')
wellcome1.head()

Unnamed: 0,PMID/PMCID,PUBLISHER,JOURNAL TITLE,ARTICLE TITLE,COST (GBP) INC. VAT
0,,CUP,PSYCHOLOGICAL MEDICINE,Reduced parahippocampal cortical thickness in ...,£0.00
1,PMC3679557,ACS,BIOMACROMOLECULES,Structural characterization of a Model Gram-ne...,£2381.04
2,23043264 PMC3506128,ACS,J MED CHEM,"Fumaroylamino-4,5-epoxymorphinans and related ...",£642.56
3,23438330 PMC3646402,ACS,J MED CHEM,Orvinols with mixed kappa/mu opioid receptor a...,£669.64
4,23438216 PMC3601604,ACS,J ORG CHEM,Regioselective opening of myo-inositol orthoes...,£685.88


In [683]:
#Take away currency symbols before and after numbers and convert into float
wellcome1['COST (GBP) INC. VAT'] = wellcome1['COST (GBP) INC. VAT'].str.lstrip('£').str.rstrip('$').astype(float)
wellcome1

Unnamed: 0,PMID/PMCID,PUBLISHER,JOURNAL TITLE,ARTICLE TITLE,COST (GBP) INC. VAT
0,,CUP,PSYCHOLOGICAL MEDICINE,Reduced parahippocampal cortical thickness in ...,0.00
1,PMC3679557,ACS,BIOMACROMOLECULES,Structural characterization of a Model Gram-ne...,2381.04
2,23043264 PMC3506128,ACS,J MED CHEM,"Fumaroylamino-4,5-epoxymorphinans and related ...",642.56
3,23438330 PMC3646402,ACS,J MED CHEM,Orvinols with mixed kappa/mu opioid receptor a...,669.64
4,23438216 PMC3601604,ACS,J ORG CHEM,Regioselective opening of myo-inositol orthoes...,685.88
5,PMC3579457,ACS,JOURNAL OF MEDICINAL CHEMISTRY,Comparative Structural and Functional Studies ...,2392.20
6,PMC3709265,ACS,JOURNAL OF PROTEOME RESEARCH,Mapping Proteolytic Processing in the Secretom...,2367.95
7,23057412 PMC3495574,ACS,MOL PHARM,Quantitative silencing of EGFP reporter gene b...,649.33
8,PMCID: PMC3780468,ACS (Amercian Chemical Society) Publications,ACS CHEMICAL BIOLOGY,A Novel Allosteric Inhibitor of the Uridine Di...,1294.59
9,PMCID: PMC3621575,ACS (Amercian Chemical Society) Publications,ACS CHEMICAL BIOLOGY,Chemical proteomic analysis reveals the drugab...,1294.78


In [684]:
#Check unique values and clean remainig misspellings
list(wellcome1['JOURNAL TITLE'].unique())

['PSYCHOLOGICAL MEDICINE',
 'BIOMACROMOLECULES',
 'J MED CHEM',
 'J ORG CHEM',
 'JOURNAL OF MEDICINAL CHEMISTRY',
 'JOURNAL OF PROTEOME RESEARCH',
 'MOL PHARM',
 'ACS CHEMICAL BIOLOGY',
 'JOURNAL OF CHEMICAL INFORMATION AND MODELING',
 'BIOCHEMISTRY',
 'GASTROENTEROLOGY',
 'JOURNAL OF BIOLOGICAL CHEMISTRY',
 'JOURNAL OF IMMUNOLOGY',
 'ACS CHEMICAL NEUROSCIENCE',
 'ACS NANO',
 'AMERICAN CHEMICAL SOCIETY',
 'ANALYTICAL CHEMISTRY',
 'BIOCONJUGATE CHEMISTRY',
 'JOURNAL OF THE AMERICAN CHEMICAL SOCIETY',
 'CHEST',
 'JOURNAL OF NEUROPHYSIOLOGY',
 'JOURNAL OF PHYSIOLOGY',
 'THE JOURNAL OF NEUROPHYSIOLOGY',
 'AMERICAN JOURNAL OF PSYCHIATRY',
 'BEHAVIORAL NEUROSCIENCE',
 'EMOTION',
 'HEALTH PSYCHOLOGY',
 'JOURNAL OF ABNORMAL PSYCHOLOGY',
 'JOURNAL OF CONSULTING AND CLINICAL PSYCHOLOGY',
 'JOURNAL OF EXPERIMENTAL PSYCHOLOGY:  ANIMAL BEHAVIOUR PROCESS',
 'JOURNAL OF EXPERIMENTAL PSYCHOLOGY: HUMAN PERCEPTION AND PERFORMANCE',
 'JOURNAL OF FAMILY PSYCHOLOGY',
 'PSYCHOLOGICAL ASSESSMENT',
 'PSYCHOLO

In [685]:
#Initial size of the dataset once grouped by Journal title
len(grouped1.index)

984

In [686]:
#Final size of the dataset once grouped by Journal title
len(grouped2.index)

827

In [687]:
#Type of data in each column
wellcome1.dtypes

PMID/PMCID              object
PUBLISHER               object
JOURNAL TITLE           object
ARTICLE TITLE           object
COST (GBP) INC. VAT    float64
dtype: object

In [688]:
#Once the data has been cleaned we proceed to answer the questions:
#Question 1: 5 most common journals and total articles for each
#Group articles by JOURNAL TITLE and count them
grouped2 = wellcome1.groupby('JOURNAL TITLE').count().reset_index()
#Sort the number of articles per journal in a decreasing number
grouped3 = grouped2.sort_values('ARTICLE TITLE', ascending = False).reset_index()
#Show the first five common journals (considering the amount of articles sold per journal)
grouped3.head(6)
#Number 3 and 4 are in the same 4th position as they have the same number of articles.

Unnamed: 0,index,JOURNAL TITLE,PMID/PMCID,PUBLISHER,ARTICLE TITLE,COST (GBP) INC. VAT
0,692,PLOS ONE,201,211,211,211
1,463,JOURNAL OF BIOLOGICAL CHEMISTRY,52,53,53,53
2,703,PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES,35,35,35,35
3,633,NEUROIMAGE,28,29,29,29
4,654,NUCLEIC ACIDS RESEARCH,28,29,29,29
5,693,PLOS PATHOGENS,24,24,24,24


In [689]:
#Qestion2: Mean, median, standard deviation of open access cost per article
#Mean of the first 5 articles
grouped4 = wellcome1.groupby('JOURNAL TITLE').describe().reset_index()
grouped4.head()

Unnamed: 0_level_0,JOURNAL TITLE,COST (GBP) INC. VAT,COST (GBP) INC. VAT,COST (GBP) INC. VAT,COST (GBP) INC. VAT,COST (GBP) INC. VAT,COST (GBP) INC. VAT,COST (GBP) INC. VAT,COST (GBP) INC. VAT
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,25%,50%,75%,max
0,ACADEMY OF NUTRITION AND DIETETICS,1.0,2379.54,,2379.54,2379.54,2379.54,2379.54,2379.54
1,ACS CHEMICAL BIOLOGY,5.0,1418.186,507.30956,947.07,1267.76,1294.59,1294.78,2286.73
2,ACS CHEMICAL NEUROSCIENCE,1.0,1186.8,,1186.8,1186.8,1186.8,1186.8,1186.8
3,ACS NANO,2.0,668.14,35.708892,642.89,655.515,668.14,680.765,693.39
4,ACTA CRYSTALLOGRAPHICA SECTION D: BIOLOGICAL C...,5.0,765.338,10.987116,750.16,757.18,771.42,773.74,774.19
