In [26]:
import pandas as pd
import os.path
import glob
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats
import seaborn as sns
import warnings
import re
warnings.filterwarnings('ignore')
%matplotlib inline
plt.style.use('ggplot')

In [88]:
# Load filelist into python list object files. Replace path_to_csvfiles with location of your files.
path_to_csvfiles = 'C:/Users/lylek/Documents/Wolf/Flow Testing Project/Flow Data Python/EDC Data/Python/CSVs/'
files = glob.glob(path_to_csvfiles+'*.csv')

# Import each CSV to a separate dataframe and store in python list dfs. Add a column for tester info.
dfs = []
for csv in files:
    frame = pd.read_csv(csv, header=None, skiprows=[0])
    frame['tester'] = os.path.basename(csv)
    dfs.append(frame)

# Concatenate all CSV data into one large dataframe, clean it up
df = pd.concat(dfs, ignore_index=True)
df.set_index(df[2], inplace=True)
df.drop(columns=[1, 2, 4, 6, 7, 8, 9, 10, 11], inplace=True)
df.rename(columns={0:'parameter', 3:'test', 5:'flowrate'}, inplace=True)
df.index.rename('Date', inplace=True)
df.index = pd.to_datetime(df.index)
df = df.astype({'parameter':'string', 'test':'string', 'flowrate':'float', 'tester':'string'})
df['tester'] = df['tester'].str.split('-').str[0]

# Add a burner column and populate based on test column.
df['burner'] = df['test'].str.split('-|_').str[0]
df['burner'].replace(to_replace={'Burner15k':'15K', 
                                 'Burner9k':'9K', 
                                 'B1':'9K',
                                 'B2':'20K',
                                 'B3':'15K',
                                 'B4':'18K',
                                 'B5':'15K',
                                 'B6':'18K'},
                     inplace=True)

# Add a line column and populate based on tester column
df.loc[df['tester'].str.startswith('SRT'), 'line'] = 'SRT'
df.loc[df['tester'].str.startswith('R'), 'line'] = 'GR'

# Set burnerLoc based on test column
df['burnerLoc'] = df['test'].str.extract('(B\d)')

# Create BTU column conversion
df['btu'] = round(df['flowrate'] * 2.117 * 1075)


# Creating and populate main/simmer, high/low columns
df['main/simmer'] = np.nan
df['high/low'] = np.nan
df.loc[df['parameter'].str.contains('Mhf', regex=True), ['main/simmer', 'high/low']] = ['M','H']
df.loc[df['parameter'].str.contains('Shf', regex=True), ['main/simmer', 'high/low']] = ['S','H']
df.loc[df['test'].str.contains('mainH', regex=True), ['main/simmer', 'high/low']] = ['M','H']
df.loc[df['test'].str.contains('mainL', regex=True), ['main/simmer', 'high/low']] = ['M','L']
df.loc[df['test'].str.contains('simH', regex=True), ['main/simmer', 'high/low']] = ['S','H']
df.loc[df['test'].str.contains('simL', regex=True), ['main/simmer', 'high/low']] = ['S','L']

# Drop unneeded columns
df.drop(columns=['parameter','test'], inplace=True)


In [89]:
df.head()

Unnamed: 0_level_0,flowrate,tester,burner,line,burnerLoc,btu,main/simmer,high/low
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-01-06 05:39:00,1.295,R8110,15K,GR,B2,2947.0,S,H
2020-01-06 06:02:00,1.335,R8110,15K,GR,B2,3038.0,S,H
2020-01-06 06:38:00,1.32,R8110,15K,GR,B2,3004.0,S,H
2020-01-06 06:56:00,1.427,R8110,15K,GR,B2,3248.0,S,H
2020-01-06 07:17:00,1.3,R8110,15K,GR,B2,2959.0,S,H


In [17]:
burner15k = df['test'] == 'Burner15k-B2'
burner9k = df['test'] == 'Burner9k-B1'
mainhigh = df['parameter'] == 'Mhflow'

df_burner15k = df[burner15k]
df_burner9k = df[burner9k]