# https://pandas.pydata.org/docs/user_guide/10min.html

In [38]:
import pandas as pd
import numpy as np
import typing
import string

In [39]:
def import_wt_csv(file_path: str) -> pd.DataFrame:
    return pd.read_csv(file_path)

In [40]:
def find_column_name_index(df: pd.DataFrame, report=False) -> int:
    # search the first column of the data to find the row that contains the column names.
    # Column names are the physical quantities measured like voltage and current
    search_term = "Store No."
    for index, entry in enumerate(df.iloc[:,0]):
        if entry == search_term:
            if report == True:
                print(f"The entry \"{entry}\" is at index: {index}")
            return index        

In [41]:

def get_column_names(df: pd.DataFrame, index: int) -> np.ndarray:
    # return an array of strings to be used as column headers
    return df.iloc[index,:].to_numpy(dtype=str)  

In [42]:
def apply_column_names(df: pd.DataFrame) -> pd.DataFrame:
    # find and set the new column names
    column_name_index = find_column_name_index(df)
    column_names = get_column_names(df, column_name_index)
    df.columns = column_names
    return

In [43]:
def apply_index(df: pd.DataFrame) -> pd.DataFrame:
    df.index = df.iloc[:,0].to_numpy() #use Store Numbers as index
    return df

In [44]:
def trim_dataframe(df: pd.DataFrame) -> pd.DataFrame:
    # trim the top of the data frame - leaving only the data below the column names row
    column_name_index = find_column_name_index(df)
    return df.iloc[column_name_index + 1:, :]

In [45]:
wt_full_sample = 'C:\\Users\\count\\dev\\pandas\\data\\PA_sample_data.csv'
wt_short_sample = 'C:\\Users\\count\\dev\\pandas\\data\\PA_sample_data_short.csv'
wt_micro_sample = "C:\\Users\\count\\dev\\pandas\\data\\PA_micro.csv"

my_path = wt_full_sample


In [46]:
df = import_wt_csv(file_path=my_path)
df_col = apply_column_names(df)
df_idx = apply_index(df_col) 
df_trm = trim_dataframe(df_idx)
df_trm


Unnamed: 0,Store No.,Date,Time,Millisecond,U-1-Total,I-1-Total,P-1-Total,S-1-Total,Q-1-Total,PF-1-Total,...,U-3-Total,I-3-Total,P-3-Total,I-3-Total.1,Q-3-Total,PF-3-Total,FreqU-3-Total,P-SigmaA-Total,S-SigmaA-Total,Q-SigmaA-Total
1,1,2022/02/24,16:00:01,101,276.879,0,-2.00E+00,0.00E+00,0.00E+00,Error,...,276.836,0,3.00E+00,0,0.00E+00,Error,60.002,1.00E+00,0.00E+00,0.00E+00
2,2,2022/02/24,16:00:02,102,276.918,0,0.00E+00,0.00E+00,0.00E+00,Error,...,276.838,0,1.00E+00,0,0.00E+00,Error,60.001,0.00E+00,0.00E+00,0.00E+00
3,3,2022/02/24,16:00:03,105,276.886,0,0.00E+00,0.00E+00,0.00E+00,Error,...,276.835,0,1.00E+00,0,0.00E+00,Error,60.002,2.00E+00,0.00E+00,0.00E+00
4,4,2022/02/24,16:00:04,101,276.915,0,-1.00E+00,0.00E+00,0.00E+00,Error,...,276.833,0,1.00E+00,0,0.00E+00,Error,60.002,1.00E+00,0.00E+00,0.00E+00
5,5,2022/02/24,16:00:05,102,276.914,0,0.00E+00,0.00E+00,0.00E+00,Error,...,276.846,0,-1.00E+00,0,0.00E+00,Error,60.002,2.00E+00,0.00E+00,0.00E+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
306,306,2022/02/24,16:05:06,104,276.899,0,0.00E+00,0.00E+00,0.00E+00,Error,...,276.834,0,1.00E+00,0,0.00E+00,Error,60.002,2.00E+00,0.00E+00,0.00E+00
307,307,2022/02/24,16:05:07,100,276.92,0,-1.00E+00,0.00E+00,0.00E+00,Error,...,276.839,0,-1.00E+00,0,0.00E+00,Error,60.002,-1.00E+00,0.00E+00,0.00E+00
308,308,2022/02/24,16:05:08,101,276.905,0,1.00E+00,0.00E+00,0.00E+00,Error,...,276.849,0,-1.00E+00,0,0.00E+00,Error,60.001,-1.00E+00,0.00E+00,0.00E+00
309,309,2022/02/24,16:05:09,107,276.904,0,-2.00E+00,0.00E+00,0.00E+00,Error,...,276.835,0,2.00E+00,0,0.00E+00,Error,60.002,2.00E+00,0.00E+00,0.00E+00
