In [28]:
import pandas as pd
import numpy as np
import sys
import os, errno
import matplotlib.pyplot as plt   
import matplotlib.dates as dates
from matplotlib.ticker import AutoMinorLocator
from matplotlib.ticker import ScalarFormatter

# Seaborn plot formatting
import seaborn as sns
sns.set_context(rc={'lines.markeredgewidth': 0.1})
sns.set_style("dark")
sns.set_style("darkgrid")

# matplotlib plot formatting
%matplotlib notebook

# Use SMBconnection to connect to Samba drive in the server
import tempfile
from smb.SMBConnection import SMBConnection
conn = SMBConnection('ihs01','!ndustr!al13', '10.10.0.81', 'ihs01', use_ntlm_v2 = True)
assert conn.connect('10.10.0.81', 139)


In [29]:
def getRGAfilename(dataBase, run, date):
    ''' NAME 
        getRGAfilename
        
        FILE
            RGA-getters.ipynb
        
        DESCRIPTION
                    
            This function pulls all file names from the SambaRGA folder for the specified 
            database and returns a dataframe containing all file names with in that date
            
            If the specified database or run does not exist 
            this function throws an error.
            
            If the specified date does not exist in the database and it is empty 
            this function throws an error note.
            
        INPUT
            dataBase: string name of database
            run:      string name of run
            date:     date in string format (YYYYMMDD), it can also contain the time 
            in string format (YYYYMMDDHHMM)
            
        RETURN
            pandas dataframe 
            
        EXAMPLE
            dataBase = 'rct007'
            run      = '004'
            date     = '20160707'
            getRGAfilenameDf = getRGAfilename('rct007', '004', '20160707') '''
    
    
    # Obtain filelist
    filelist = conn.listPath('SambaRGA',dataBase +'/'+run)
   
    # Obtain a list of file names
    filenameList = []
    for fn in filelist:
        filenameList.append([fn.filename])
        
    # Sort file names
    filenameList.sort()
    
    # Make filename into a data frame
    df = pd.DataFrame(filenameList)
    
    # Show filename on the certain date
    fl=df[df[0].str.contains(date)==True]
    
    #Throw error if date does not exist and dataframe is empty
    if fl.empty:
        print 'Null Dataframe - Date specified cannot be found' 
   
    return fl

In [32]:
def getRGAData(dataBase, run, filename):
    '''NAME 
        getRGAData
        
        FILE
            RGA-getters.ipynb
        
        DESCRIPTION
                     
            This function pulls RGA data from the SambaRGA folder for the specified database and returns:
                - Total pressure
                - Partial Pressure for selected gases including H2, D2/He, mass 5 and 6, H2O, O2, Ar and CO2
                - a multi-tiered dataframe containing 
                        - AMU(atomic mass unit)
                        - pressure percentage at each AMU 
                        - partial pressure
           
            If the specified database or run does not exist 
            this function throws an error.
            
            If the filename date does not exist in the database and it is empty 
            this function throws an error and a hint.
            
        INPUT
            dataBase: string name of database
            run:      string name of run
            filename: file name in string format of YYYYMMDDHHMM (date and time), which can be obtained 
                      from getRGAfilename
            
        RETURN
            a list of strings and pandas dataframe 
            
        EXAMPLE
            dataBase = 'rct007'
            run      = '004'
            filename = '201607071944'
            getRGAData('rct007', '004', '201607071944')'''
    
    # generate the file path for histogram file
    filePathHis = dataBase+'/'+run+'/'+filename+'-histogram.csv'   

    # Throw error message if file name was wrong
    try:
        with open('histogram.txt', 'w') as fp:
            conn.retrieveFile('SambaRGA', filePathHis, fp, timeout=30)
    except:
        sys.exit('Incorrect or no file name found! Try it with a format of YYYYMMDDHHMM, e.g. 201607121314')
        
    # Get 'Mass number' and 'percentage' for plotting
    data = pd.read_csv('histogram.txt', header= 10, skipfooter = 250, sep=',')
    data.columns = ['AMU','Percentage']

    # Get header
    header = pd.read_csv('histogram.txt', header= 0, skipfooter = 300, sep=',')
    header.columns = ['subject','value']
    
    # Total Pressure
    headerColums = header['value']
    tp = headerColums[5]
    totalPressure = float(tp)
    
    ttlPressure = 'Total Pressure is: ' + str(totalPressure)
    print ttlPressure
    print 
    
    # Convert percentage to total pressure
    data['Partial Pressure'] = data['Percentage']*totalPressure/100
    
    b = data['Partial Pressure']

    PH2=b[1]+b[2]+b[0]
    PHe= b[3]
    PMass5=b[4]
    PMass6=b[5]
    PH2O=b[17]+b[18]
    PO2=b[31]
    PCO2=b[43]
    PAr=b[39]

    # Print out results
    print 'H2 partial pressure (Torr) is' 
    if  PH2 > 1e-10:
        print PH2
    else:
        print 'N/A'
    print

    print 'D2/He partial pressure (Torr) is' 
    if  PHe > 1e-10:
        print PHe
    else:
        print 'N/A'
    print

    print 'Mass 5 (D2H+) partial pressure (Torr) is'
    if  PMass5 > 1e-11:
        print PMass5
    else:
        print 'N/A'
    print

    print 'Mass 6 (D3+) partial pressure (Torr) is'
    if  PMass6 > 1e-11:
        print PMass6
    else:
        print 'N/A'
    print

    print 'H2O partial pressure (Torr) is' 
    if  PH2O> 1e-11:
        print PH2O
    else:
        print 'N/A'
    print

    print 'O2 partial pressure (Torr) is'
    if  PO2> 1e-11:
        print PO2
    else:
        print 'N/A'
    print

    print 'Ar partial pressure is' 
    if  PAr> 1e-11:
        print PAr
    else:
        print 'N/A'
    print

    print 'CO2 partial pressure is' 
    if  PCO2> 1e-11:
        print PCO2
    else:
        print 'N/A'
    print 

    return data

In [31]:
def getRGAPlot(dataBase, run, filename):
    ''' NAME 
        getRGAPlot
        
        FILE
            RGA-getters.ipynb
        
        DESCRIPTION
                     
            This function plots RGA plots from the SambaRGA folder for the specified database and returns:
                - Figure 1: a bar plot of Partial Pressure vs Atomic Mass Number
                - Figure 2: a bar plot of Gas Component Percentage vs Atomic Mass Number
                - Figure 3: a Trend curve
                - Figure 4: a raw spectra curve
           
            If the specified database or run does not exist 
            this function throws an error.
            
            If the corresponding date for file name does not exist in the database 
            this function throws an error and a hint.
            
        INPUT
            dataBase: string name of database
            run:      string name of run
            filename: file name in string format of YYYYMMDDHHMM (date and time), which can be obtained 
                      from getRGAfilename
            
        RETURN
            four plots with zoom in/out functions
            
        EXAMPLE
            dataBase = 'rct007'
            run      = '004'
            filename = '201607071944'
            getRGAPlot('rct007', '004', '201607071944') '''
   
    # generate the file path for histogram file
    filePathHis = dataBase+'/'+run+'/'+filename+'-histogram.csv'
        
    # Throw error message if file name was wrong
    try:
        with open('histogram.txt', 'w') as fp:
            conn.retrieveFile('SambaRGA', filePathHis, fp, timeout=30)
    except:
        sys.exit('Incorrect or no file name found! Try it with a format of YYYYMMDDHHMM, e.g. 201607121314')

    
    # copy the raw spectra file in the server to local txt file
    with open('histogram.txt', 'w') as fp:
        conn.retrieveFile('SambaRGA', filePathHis, fp, timeout=30)
    
    # Get 'Mass number' and 'percentage' for plotting
    data = pd.read_csv('histogram.txt', header= 10, skipfooter = 250, sep=',')
    data.columns = ['AMU','Percentage']

    # Get header
    header = pd.read_csv('histogram.txt', header= 0, skipfooter = 300, sep=',')
    header.columns = ['subject','value']
    
    # Total Pressure
    headerColums = header['value']
    tp = headerColums[5]
    totalPressure = float(tp)
    
    ttlPressure = 'Total Pressure is: ' + str(totalPressure)
    print ttlPressure
    
    # Convert percentage to total pressure
    data['Partial Pressure'] = data['Percentage']*totalPressure/100
   
    # Plot RGA spectra as bar plot
    data.plot(x='AMU', y='Partial Pressure',kind='bar', figsize=(12, 8))
    
    # Plot formatting
    plt.xlabel('AMU', fontsize=20)
    plt.ylabel('Partial Pressure', fontsize=20)
    plt.tick_params(axis='x', labelsize=20)
    plt.tick_params(axis='y', labelsize=20)
    plt.title('Partial Pressure vs Atomic Mass Number', fontsize=25, ha="center")  
    plt.autoscale(tight=True)
    plt.tight_layout()
    plt.ticklabel_format(useOffset=False, axis='y')
    
    # Plot gas percentages 

    title = 'Gas Percentage'

    # Calculate the x-axis, and edit the formatting
    x = data['AMU']
    N = len(x)
    amu = range(1, N+1)
    width = 1
    fig, ax = plt.subplots(1,1, figsize=[12,8])

    #Make the bar plot
    plt.bar(amu, data['Percentage'], width, color="blue")
    ax.yaxis.grid(True)
    ax.xaxis.grid(b=True, which='major', linestyle='-')
    ax.xaxis.grid(b=True, which='minor', linestyle='-')

    # Add lables for selected gases
    ax.annotate('H2O',fontsize=28, xy=(18, 25),  xycoords='data',
                xytext=(0.4, 0.6), textcoords='axes fraction',
                arrowprops=dict(facecolor='black', shrink=0.1),
                horizontalalignment='middle', verticalalignment='top',)
    ax.annotate('H2',fontsize=28, xy=(2, 3),  xycoords='data',
                xytext=(0.02, 0.2), textcoords='axes fraction',
                arrowprops=dict(facecolor='black', shrink=0.1),
                horizontalalignment='middle', verticalalignment='top',)
    ax.annotate('D2 or He',fontsize=28, xy=(4, 2),  xycoords='data',
                xytext=(0.1, 0.15), textcoords='axes fraction',
                arrowprops=dict(facecolor='black', shrink=0.1),
                horizontalalignment='middle', verticalalignment='top',)
    ax.annotate('N2',fontsize=28, xy=(29, 2),  xycoords='data',
                xytext=(0.58, 0.2), textcoords='axes fraction',
                arrowprops=dict(facecolor='black', shrink=0.1),
                horizontalalignment='middle', verticalalignment='top',)
    ax.annotate('O2',fontsize=28, xy=(32, 2),  xycoords='data',
                xytext=(0.68, 0.2), textcoords='axes fraction',
                arrowprops=dict(facecolor='black', shrink=0.1),
                horizontalalignment='middle', verticalalignment='top',)
    ax.annotate('CO2',fontsize=28, xy=(44, 2),  xycoords='data',
                xytext=(0.79, 0.2), textcoords='axes fraction',
                arrowprops=dict(facecolor='black', shrink=0.1),
                horizontalalignment='middle', verticalalignment='top',)

    # Plot formatting
    minorLocator = AutoMinorLocator(15)
    ax.xaxis.set_minor_locator(minorLocator)
    plt.xlabel('AMU', fontsize=20)
    plt.ylabel('Percentage', fontsize=20)
    plt.tick_params(axis='x', labelsize=15)
    plt.tick_params(axis='y', labelsize=15)
    plt.title(title, fontsize=25, ha="center")  
    plt.autoscale(tight=True)
    plt.tight_layout()
    plt.ticklabel_format(useOffset=False, axis='y')
    fig.canvas.draw()

    
    # Generate Trend file path
    filePathTre = dataBase+'/'+run+'/'+filename+'-trend.csv'
    
    # copy the trend file in the server to local txt file
    with open('trend.txt', 'w') as fp:
        conn.retrieveFile('SambaRGA', filePathTre, fp, timeout=30)

    # convert txt file to data frame
    data = pd.read_csv('trend.txt', header= 0, skipfooter = 0, sep=',')

    # Convert percentage into partial pressure for mass number 1-50
    for i in range (1, 51):
        data[str(i) + 'AMU Partial P'] = data[str(i)]*data['Avg Pressure']
    
    # Plot trend curve 
    data.plot(x = 'Time', y = ['Avg Pressure','1AMU Partial P', '2AMU Partial P', '3AMU Partial P', '4AMU Partial P',
                           '5AMU Partial P','6AMU Partial P', '18AMU Partial P','19AMU Partial P','40AMU Partial P'], 
          color=['b','r','y','g','k','Pink','LightGreen','DarkRed','DarkOrange','Gray'],
          figsize = [10, 8],x_compat=True)

    # formatting the plot
    plt.xlabel('Time', fontsize=25)
    plt.ylabel('Partial Pressure', fontsize=25)
    plt.tick_params(axis='x', labelsize=15)
    plt.tick_params(axis='y', labelsize=15)
    plt.title('Trend curve', fontsize=25, ha="center") 
    
    # Rotate x axis lables 45 degree
    plt.xticks(rotation=45)

    plt.autoscale(tight=True)
    plt.tight_layout()
    plt.ticklabel_format(useOffset=False, axis='y')
    
    # Generate file path for raw spectra
    filePathCur = dataBase+'/'+run+'/'+filename+'-curve.csv'
    
    # copy the raw spectra file in the server to local txt file
    with open('curve.txt', 'w') as fp:
        conn.retrieveFile('SambaRGA', filePathCur, fp, timeout=30)

    # read the txt file into data frame
    data = pd.read_csv('curve.txt', header= 10, sep=',')
    data.columns = ['Mass','Intensity']

    # Plot the raw spectra and edit formatting
    data.plot(x = 'Mass', y = 'Intensity', figsize = [11, 8])
    plt.xlabel('Mass', fontsize=20)
    plt.ylabel('Signal Intensity', fontsize=20)
    plt.tick_params(axis='x', labelsize=15)
    plt.tick_params(axis='y', labelsize=15)

    title = 'Residual Gas Analysis Raw Spectra'
    plt.title(title, fontsize=20, ha="center")  

    plt.autoscale(tight=True)
    plt.tight_layout()
    plt.ticklabel_format(useOffset=False, axis='y')  
    
    return 