In [4]:
# https://pandas.pydata.org/docs/user_guide/10min.html
from random import random
import pandas as pd
import numpy as np
import typing
import string

https://pandas.pydata.org/docs/user_guide/indexing.html#different-choices-for-indexing


Rows are called the *index* & Columns are called *columns*. Each may be indexed using location or label 


In [5]:
def make_indicies(n, name):
    idx = []
    for i in range(n):
        idx.append(name + str(i))
    return idx

Make a Dataframe

In [6]:
I = make_indicies(4, "idx_")
C = make_indicies(5, "col_")
D = np.abs(np.floor(np.random.randn(len(I), len(C))*10))
df = pd.DataFrame(D, index=I, columns=C)
df

Unnamed: 0,col_0,col_1,col_2,col_3,col_4
idx_0,6.0,10.0,11.0,1.0,8.0
idx_1,9.0,13.0,8.0,23.0,5.0
idx_2,1.0,19.0,2.0,18.0,7.0
idx_3,3.0,13.0,5.0,24.0,3.0


In [7]:
#return a column of data:
col = df.loc[:, "col_3"]
col

idx_0     1.0
idx_1    23.0
idx_2    18.0
idx_3    24.0
Name: col_3, dtype: float64

In [8]:
#return select columns of data:
cols = df.loc[:, ["col_1", "col_3"]]
cols

Unnamed: 0,col_1,col_3
idx_0,10.0,1.0
idx_1,13.0,23.0
idx_2,19.0,18.0
idx_3,13.0,24.0


In [9]:
#return select columns of data, with select rows/indexes
columns_to_get = ["col_1", "col_3"]
rows_to_get = ["idx_1", "idx_3"]

cols = df.loc[rows_to_get, columns_to_get]
cols

Unnamed: 0,col_1,col_3
idx_1,13.0,23.0
idx_3,13.0,24.0


### READ in a CSV file by filepath

In [10]:
# https://pandas.pydata.org/docs/user_guide/10min.html
from random import random
import pandas as pd
import numpy as np
import typing
import string

In [11]:
file_path = "C:\\Users\\count\\dev\\pandas\\data\\PA_micro.csv"
df_raw = pd.read_csv(file_path)
#df_raw
# print(f"shape: {df_raw.shape}")
# df_raw.shape
# df_raw.index


In [12]:

def find_column_name_index(df: pd.DataFrame, report=False) -> int:
    # search the first column of the data to find the row that contains the column names.
    # Column names are the physical quantities measured like voltage and current
    search_term = "Store No."
    for index, entry in enumerate(df.iloc[:,0]):
        if entry == search_term:
            if report == True:
                print(f"The entry \"{entry}\" is at index: {index}")
            return index        

def get_column_names(df: pd.DataFrame, index: int) -> np.ndarray:
    # return an array of strings to be used as column headers
    return df.iloc[index,:].to_numpy(dtype=str)  

In [13]:
# find and set the new column names
column_name_index = find_column_name_index(df_raw)
column_names = get_column_names(df_raw, column_name_index)
df_raw.columns = column_names

In [14]:
# trim the top of the data frame - leaving only the data below the column names row
df_2 = df_raw.iloc[column_name_index + 1:, :]


In [15]:

df_2.index = df_2.iloc[:,0].to_numpy()
df_2

Unnamed: 0,Store No.,Date,Time,Millisecond,U-1-Total,I-1-Total
1,1,2022/02/24,16:00:01,101,276.88,0
2,2,2022/02/24,16:00:02,102,276.92,0
3,3,2022/02/24,16:00:03,105,276.89,0
4,4,2022/02/24,16:00:04,101,276.92,0


In [16]:
df_2.loc['1':'3',"U-1-Total"].to_numpy(dtype=float)

array([276.88, 276.92, 276.89])