In [1]:
#This notebook will import a touchstone file and display insertion loss graph.
import pandas as pd

# Set the url of the raw .s4p file
url = "https://raw.githubusercontent.com/jayarerita/signal_integrity/master/cable_data_1.s4p"
!wget -O data1.s4p https://raw.githubusercontent.com/jayarerita/signal_integrity/master/cable_data_1.s4p

# This section here is used to read the first 100 rows of the file above as text and 
# parse through each line to find the "#" symbol signifying the start of the data. 
# This can be used to set the number of "skip rows" for read_csv below. We can also pull the information from this
# line to show what the units and format of the data are. For example if row number 24 is "#Hz S dB R 50" then 
# we would set skip_rows = 24, SI_units = "dB", number_type = "real", tester_impedance = "50"

with open("data1.s4p", "r") as file1:
    # Search through the first 100 rows of the file
    for i in range(0, 100):
        # Assign the entire row to a string variable for slicing
        one_line = file1.readline()
        # Search the first character in each successive line for the "#" symbol indicating the key row
        if (one_line[0] == "#"):
            # Set the skip row number for use in the read_csv method later on
            skip_rows = i
            # Set a string variable to the skip row containing the units, number type, and impedance for later use
            key_row = one_line
        
print('The firtst ' + str(skip_rows) + ' rows of the file contain header info and will be skipped.')
print('The s-parameter data is in the format ' + key_row)

--2019-03-20 21:20:21--  https://raw.githubusercontent.com/jayarerita/signal_integrity/master/cable_data_1.s4p
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 

151.101.248.133
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.248.133|:443... 

connected.


HTTP request sent, awaiting response... 

200 OK
Length: 1426153 (1.4M) [text/plain]
Saving to: ‘data1.s4p’

data1.s4p             0%[                    ]       0  --.-KB/s               




2019-03-20 21:20:22 (4.26 MB/s) - ‘data1.s4p’ saved [1426153/1426153]



The firtst 20 rows of the file contain header info and will be skipped.
The s-parameter data is in the format # Hz S  dB   R 50



In [2]:
# Read in the space delimited file, skipping the rows of header information. The skip_rows is + 1 because of indexing.
df_raw = pd.read_csv(url, skiprows = skip_rows + 1, delim_whitespace = True, header = None)
# Store the shape of the dataframe for cleaning below
shape = df_raw.shape
df_raw.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,10000000.0,-18.596588,25.395187,-0.829325,-18.962502,-20.599869,20.452127,-22.130384,167.08244
1,-0.8246641,-18.926477,-19.9695,14.719769,-22.138124,167.04578,-21.730087,6.246666,
2,-20.57517,20.198177,-22.130781,166.88591,-18.240007,27.106436,-0.820171,-18.926466,
3,-22.11367,167.3744,-21.719362,6.261957,-0.831857,-18.954756,-19.607506,17.418802,
4,20000000.0,-15.93305,23.195728,-0.907085,-36.68045,-18.317211,22.228508,-22.045498,148.03929


The s4p data comes in an odd format. Each fourth line has 9 elements while the other 3 have 8.  We want to move the 8 elements on line 2,3, and 4 for each group to line up with elements 2-9 in line 1. The empty space left by this move will be filled by null values.

In the loop below we will parse through and reassign every element.

In [3]:
df_clean = df_raw
# Parse through each row
for row in range(shape[0]):
    # Parse through each column
    for col in range(shape[1]):
        # Set a boolean value to True if it is the 1st, 5th, 9th, etc. line with 9 elements.
        fourth_row = ((row + 1) % 4 == 1)
        # Set a boolean value to True if it is the 1st column.
        first_col = (col == 0)
        # print('row: ' + str(row) + " col: " + str(col) + " fourth_row: " + str(fourth_row) + ' first_col: ' + str(first_col))
        if (fourth_row and first_col):
            df_clean.iloc[row, col] = df_raw.iloc[row, col]
            freq = df_raw.iloc[row,0]
            #print('1')
        elif (first_col):
            df_clean.iloc[row,0] = freq
            #print('2')
        elif (fourth_row):
            # do nothing
            df_clean.iloc[row,col] = df_raw.iloc[row, col]
            #print('3')
        else:
            df_clean.iloc[row,col] = df_raw.iloc[row, col - 1]
            #print('4')
            

In [85]:
df_clean.columns = ['0', '1', '2', '3', '4', '5', '6', '7', '8']

df_clean.head(5)



Unnamed: 0,0,1,2,3,4,5,6,7,8
0,10000000.0,-18.59659,25.39519,-0.8293254,-18.9625,-20.59987,20.45213,-22.13038,167.0824
1,10000000.0,10000000.0,10000000.0,10000000.0,10000000.0,10000000.0,10000000.0,10000000.0,10000000.0
2,10000000.0,10000000.0,10000000.0,10000000.0,10000000.0,10000000.0,10000000.0,10000000.0,10000000.0
3,10000000.0,10000000.0,10000000.0,10000000.0,10000000.0,10000000.0,10000000.0,10000000.0,10000000.0
4,20000000.0,-15.93305,23.19573,-0.9070854,-36.68045,-18.31721,22.22851,-22.0455,148.0393


In [81]:
# Drop the imaginary columns, 2, 4, 6, 8
df_clean.drop(['2', '4'], axis = 1)

# Preview the clean data
df_clean.head(5)
            

KeyError: "['4'] not found in axis"