In [1]:
import pdfplumber
import scraper
import numpy as np
import config

In [2]:
userHome = config.userHome()
pdfHome = userHome + "/projects/cs/ref/angus/"
outdir = userHome + "/projects/cs/data/raw/ext/n2/"

In [3]:
pdfItikawa = scraper.getPDF(pdfHome + "16_Itikawa_J_Phys_Chem_Ref_Data_35_31_2006.pdf")
pdfBuckman = scraper.getPDF(pdfHome + "16_29_buckman_2003.pdf")
pdfKarwasz = scraper.getPDF(pdfHome + "16_9_karwasz_2003.pdf")
pdfElford = scraper.getPDF(pdfHome + "16_31_elford_2003.pdf")
pdfZipf = scraper.getPDF(pdfHome + "20_zipf_et_al-1980-Journal_of_Geophysical_Research__Space_Physics.pdf")

In [4]:
scraper.rmdir(outdir)
scraper.mkdir(outdir)

In [5]:
cropDim = [(300, 503, 390, 720),(400, 503, 470, 720),(480, 503, 560, 720)]

In [6]:
data = scraper.getTableData(pdf=pdfItikawa,pageNumber=3,cropDimArray=cropDim,locateTables=False,omitRegexp='[0-9]a')

In [7]:
splitIndex = np.where(data[:,0]==5.0)[0][0]
scraper.writeDataToFile(data[:splitIndex],outdir+"total_itikawa2006_2a")
scraper.writeMetaDataToFile(filename=outdir+"total_itikawa2006_2a.metadata",
                   specie="N2",
                   process="total",
                   units_e=1.0,
                   units_sigma=1.0E-20,
                   ref="itikawa2006",
                   background='computed from weighted average of four sets of experimental data, primarily time of flight (TOF) experiments; Itikawa states that the data may have large uncertainty (up to +/- 20%)',
                   lpu=0.2,
                   upu=0.2)

scraper.writeDataToFile(data[splitIndex:],outdir+"total_itikawa2006_2b")
scraper.writeMetaDataToFile(filename=outdir+"total_itikawa2006_2b.metadata",
                   specie="N2",
                   process="total",
                   units_e=1.0,
                   units_sigma=1.0E-20,
                   ref="itikawa2006",
                   background='computed from weighted average of nine sets of experimental data, primarily time of flight (TOF) experiments; no estimate of the uncertainty'
                   )

In [8]:
cropDim = [(60, 165, 180, 645),(180, 165, 290, 645),(290, 165, 400, 645),(400, 165, 510, 645)]

In [9]:
data = scraper.getTableData(pdf=pdfKarwasz,pageNumber=14,cropDimArray=cropDim,locateTables=False)

In [10]:
scraper.writeDataToFile(data,outdir+"total_karwasz2003_6.1.3a")
scraper.writeMetaDataToFile(filename=outdir+"total_karwasz2003_6.1.3a.metadata",
                            specie="N2",
                            process="total",
                            units_e=1.0,
                            units_sigma=1.0E-20,
                            ref="karwasz2003",
                            background='This structure is due to the presence of the 2Pig resonant state which manifests itself in total, elastic and vibrational excitation cross sections; different TCS measurements agree on the position and height of the peaks; the resonant structure in N2 , in particular as measured in Kennerly, R.E.: Phys. Rev. A 21 (1980) 1876, is the most frequently used as standard in the energy scale calibration for low-energy electron scattering.'
                           )

In [11]:
cropDim = [(70, 410, 220, 550),(220, 410, 370, 550),(370, 410, 500, 550)]

In [12]:
data = scraper.getTableData(pdf=pdfBuckman,pageNumber=5,cropDimArray=cropDim,locateTables=False)

In [13]:
scraper.writeDataToFile(data,outdir+"elastic_buckman2003_6.2.5.2")
scraper.writeMetaDataToFile(filename=outdir+"elastic_buckman2003_6.2.5.2.metadata",
                   specie="N2",
                   process="elastic",
                   units_e=1.0,
                   units_sigma=1.0E-20,
                   ref="buckman2003",
                   background='derived from differential scattering measurements; at low energies, in the region of the dominant 2Pig resonance, the level of detail in the DCS measurements does not permit the fine details of the resonance profile to be extracted, so a broad envelope of the resonance enhanced cross section is thus provided; there is good general agreement between the preferred cross section and theoretical calculations',
                   lpu=0.2,
                   upu=0.2)

In [14]:
cropDim = [(90,255,200,750),(200,255,360,750),(360,255,520,750)]

In [15]:
data = scraper.getTableData(pdf=pdfElford,pageNumber=6,cropDimArray=cropDim,locateTables=False)

In [16]:
splitIndexA = np.where(data[:,0]==0.5)[0][0]
splitIndexB = np.where(data[:,0]==1.9)[0][0]
splitIndexC = np.where(data[:,0]==3.5)[0][0]
scraper.writeDataToFile(data[:splitIndexA],outdir+"momentum_elford2003_a")
scraper.writeMetaDataToFile(filename=outdir+"momentum_elford2003_a.metadata",
                            specie="N2",
                            process="momentum",
                            units_e=1.0,
                            units_sigma=1.0E-20,
                            ref="elford2003",
                            background='',
                            lpu=0.05,
                            upu=0.05)

scraper.writeDataToFile(data[splitIndexA:splitIndexB],outdir+"momentum_elford2003_b")
scraper.writeMetaDataToFile(filename=outdir+"momentum_elford2003_b.metadata",
                            specie="N2",
                            process="momentum",
                            units_e=1.0,
                            units_sigma=1.0E-20,
                            ref="elford2003",
                            background='',
                            lpu=0.1,
                            upu=0.1)

scraper.writeDataToFile(data[splitIndexB:splitIndexC],outdir+"momentum_elford2003_c")
scraper.writeMetaDataToFile(filename=outdir+"momentum_elford2003_c.metadata",
                            specie="N2",
                            process="momentum",
                            units_e=1.0,
                            units_sigma=1.0E-20,
                            ref="elford2003",
                            background='from theoretical momentum  transfer  cross  sections for  vibrational transitions v=0->0; the table in the original reference (Robertson 1997) gives cross sections for v=0->1,2 transitions and total momentum transfer cross sections as the sum of the three components'
                           )

scraper.writeDataToFile(data[splitIndexC:],outdir+"momentum_elford2003_d")
scraper.writeMetaDataToFile(filename=outdir+"momentum_elford2003_d.metadata",
                            specie="N2",
                            process="momentum",
                            units_e=1.0,
                            units_sigma=1.0E-20,
                            ref="elford2003",
                            background='',
                            lpu=0.2,
                            upu=0.2)

In [17]:
#TODO: incorporate vibrational cross sections from Brunger

In [18]:
cropDim = [(50, 105, 170, 275),(180, 105, 300, 275)]

In [19]:
data = scraper.getTableData(pdf=pdfItikawa,pageNumber=6,cropDimArray=cropDim,locateTables=False)
scraper.writeDataToFile(data,outdir+"rotational_itikawa2006")
scraper.writeMetaDataToFile(filename=outdir+"rotational_itikawa2006.metadata",
                            specie="N2",
                            process="excitation_j",
                            units_e=1.0,
                            units_sigma=1.0E-20,
                            ref="itikawa2006",
                            lhs_j=0,
                            rhs_j=2,
                            background='for J=0->2 transition; based on theoretical calculations of morrison1997',
                            lpu=0.1,
                            upu=0.1)

Itikawa, Table 8. Recommended cross sections for the electron impact excitation from the ground state $N_2 (X^1\Sigma)$ (Part 1)

In [20]:
cropDim = [(120, 440, 200, 750),(210,440,290,750),(290,440,400,750),(400,440,500,750)]

In [21]:
data = []
for i in range(len(cropDim)):
    data.append(scraper.getTableData(pdf=pdfItikawa,pageNumber=9,cropDimArray=[cropDim[i]],locateTables=False))

splitIndex = np.where(data[0][:,0]==15)[0][0]

In [22]:
backgroundText = """R-matrix method theoretical cross sections have been referred to for the detailed
structure near threshold; oherwise a weighted average of the experimental cross sections has been taken
with a polynomial least square fit to the energy dependence of the individual set of the cross
sections; thus the estimated uncertainty indicates the degree
of the concurrence of the individual experimental results"""

dataArray=[data[0][:splitIndex],data[0][splitIndex:],data[1],data[2],data[3]]
lpuArray=[.4,.35,.35,.35,.4]
upuArray=lpuArray
rhsArray=['A3Sigmau+','A3Sigmau+','B3Pig','W3Deltau','Bp3Sigmau-']

for i in range(len(dataArray)):
    scraper.writeCSToFile(filename=outdir+"excitation_itikawa2006_a_"+str(i),
                      dataArray=dataArray[i],
                      specie="N2",
                      process="excitation",
                      lhsA='X1Sigmag+',
                      rhsA=rhsArray[i],
                      units_e=1.0,
                      units_sigma=1.0E-20,
                      ref="itikawa2006",
                      background=config.removeCRs(backgroundText),
                      lpu=lpuArray[i],
                      upu=upuArray[i])

Itikawa, Table 8. Recommended cross sections for the electron impact excitation from the ground state $N_2 (X^1\Sigma)$ (Parts 2 and 3)

In [23]:
cropDim = [(40, 120, 120, 450),(130,120,210,450),(210,120,290,450),
           (310,120,400,450),(400,120,480,450),(480,120,560,450)]

In [24]:
data = []
for i in range(len(cropDim)):
    data.append(scraper.getTableData(pdf=pdfItikawa,pageNumber=10,cropDimArray=[cropDim[i]],locateTables=False))

In [25]:
backgroundText = ["""a weighted average of the experimental
cross sections has been taken with a polynomial least square
fit to the energy dependence of the individual set of the cross
sections. Thus the estimated uncertainty indicates the degree
of the concurrence of the individual experimental results""",
"""when compling the data set, Brunger considered
four sets of beam measurements, an electron energy loss measurement in which the data
were normalized using an emission cross section of the
Lyman–Birge–Hopfield (LBH) system, and Mason
and Newell's direct detection of the excited molecule""",
"""a weighted average of the experimental
cross sections has been taken with a polynomial least square
fit to the energy dependence of the individual set of the cross
sections. Thus the estimated uncertainty indicates the degree
of the concurrence of the individual experimental results""",
"""a weighted average of the experimental
cross sections has been taken with a polynomial least square
fit to the energy dependence of the individual set of the cross
sections. Thus the estimated uncertainty indicates the degree
of the concurrence of the individual experimental results""",
"""Brunger determined their recommended
values considering five sets of
beam measurementsthe cross section; the data has
a sharp peak in the vicinity of the threshold
identified with a core-excited shape resonance; two groups
have determined the resonant cross section with the use of
direct detection of the molecule in the metastable E state; 
the magnitudes of the two sets of cross section differ significantly
from one another; by using a trochoidal electron
spectrometer, Poparich determined the absolute
values of the cross section at 11.94 and 12.14 eV, and this measurement
supported one set of the cross section against the
other""",
"""a weighted average of the experimental
cross sections has been taken with a polynomial least square
fit to the energy dependence of the individual set of the cross
sections. Thus the estimated uncertainty indicates the degree
of the concurrence of the individual experimental results"""]
                  
lpuArray=[.3,.25,.3,.3,.4,.33]
upuArray=lpuArray
rhsArray=['apSigmau-','a1Pig','w1Deltau','C3Piu','E3Sigmag+','app1Sigmag+']

for i in range(len(data)):
    scraper.writeCSToFile(filename=outdir+"excitation_itikawa2006_b_"+str(i),
                      dataArray=data[i],
                      specie="N2",
                      process="excitation",
                      lhsA='X1Sigmag+',
                      rhsA=rhsArray[i],
                      units_e=1.0,
                      units_sigma=1.0E-20,
                      ref="itikawa2006",
                      background=config.removeCRs(backgroundText[i]),
                      lpu=lpuArray[i],
                      upu=upuArray[i])

In [26]:
cropDim = [(300,490,390,780),(390,490,480,780),(480,490,560,780)]

In [27]:
data = []
for i in range(len(cropDim)):
    data.append(scraper.getTableData(pdf=pdfItikawa,pageNumber=12,cropDimArray=[cropDim[i]],locateTables=False))

In [28]:
lpuArray=['\\N',.135,.22]
upuArray=lpuArray
lhsArray=['a1Pig','C3Piu','c4p1Sigmau+']
rhsArray=['X1Sigmag+','B3Pig','X1Sigmag+']
wavelengthArray=[135.4,337.1,95.8]
lhs_vArray=[3,0,0]
rhs_vArray=[0,0,0]

for i in range(len(data)):
    scraper.writeCSToFile(filename=outdir+"c_deexcitation_v_itikawa2006_"+str(i),
                      dataArray=data[i],
                      specie="N2",
                      process="c_deexcitation_v",
                      lhsA=lhsArray[i],
                      rhsA=rhsArray[i],
                      wavelength=wavelengthArray[i],
                      lhs_v=lhs_vArray[i],
                      rhs_v=rhs_vArray[i],
                      units_e=1.0,
                      units_sigma=1.0E-22,
                      ref="itikawa2006",
                      background='',
                      lpu=lpuArray[i],
                      upu=upuArray[i])

Itikawa, Table 14. Total dissociation cross section for electron collisions with $N_2$ recommended by Cosby.

In [29]:
cropDim = [(60, 560, 300, 750)]

In [30]:
data = scraper.getTableData(pdf=pdfItikawa,pageNumber=17,cropDimArray=cropDim,locateTables=False)

In [31]:
scraper.writeDataToFile(data,outdir+"total_dissociation_itikawa2006_14")
scraper.writeMetaDataToFile(filename=outdir+"total_dissociation_itikawa2006_14.metadata",
                   specie="N2",
                   process="dissociation",
                   units_e=1.0,
                   units_sigma=1.0E-20,
                   ref="itikawa2006",
                   background='Weighted sum of Winters gas cell and Cosby fast $N_2$ beam data; for Cosby data, the correlated pair N + N was detected by a time and position sensitive detecter. Error for Cosby data was 20% and error for Winters data was 30%.',
                   lpu=0.3,
                   upu=0.3)

In [32]:
cropDim = [(60, 120, 240, 480)]

In [33]:
data = scraper.getTableStrings(pdf=pdfZipf,pageNumber=6,cropDimArray=cropDim,locateTables=False)

AttributeError: module 'scraper' has no attribute 'getTableStrings'

In [None]:
data