In [3]:
#Install require packages
!pip install numpy
!pip install pandas
!pip install seaborn
!pip install plotly
!pip install scikit-learn

import numpy as np
import pandas as pd
import json
from pandas.io.json import json_normalize
import matplotlib.pyplot as plt
import seaborn as sns
import glob, os

from datetime import date, timedelta

import plotly
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_colwidth', -1)



## Design :  ibex <br />  <br /> Platform: sky130hd

A sample dataset with multiple runs varying the design utilization and the layer_adjust parameters for the routing layers. All design metrics from the runs are collected for analysis.

Metrics data is represented as json files in the  METRICS2.1 format. Each experiment in the run is a separate json file. All of the files are read into a DataFrame 'json_df'.

* Rows of the DataFrame represent an experiment.
* Columns of the DataFrame represent the metrics.

In [2]:
path = './metrics'
json_df = pd.DataFrame()
for filename in glob.glob(os.path.join(path, '*.json')):
    with open(os.path.join(os.getcwd(), filename), 'r') as f:
        data = f.read()
    data_json = json_normalize(json.loads(data))
    json_df = json_df.append(data_json)

In [None]:
print(f'Number of runs in the dataset: {json_df.shape[0]}')
print(f'Number of metrics in each data: {json_df.shape[1]}')

* The columns in the DataFrame, which are the metrics colleced for each run.

In [None]:
json_df.columns

**Data Engineering**
* Extract only the relevant columns -- for this exercise, get the design name, flow variant and route metrics.
* Parse Variant and extract the relevant features  -- Utilization, layer adjust for each routing layer.
* Rename columns.
* Create new columns (features) based on other column values -- for e.g. Success/Fail for the router.

In [None]:
# Data Engineering, extract the relevant metrics
metrics_df = json_df[['run.flow__design',
                      'run.flow__variant',
                      'globalroute.timing__setup__ws',
                      'detailedroute.route__wirelength',
                      'detailedroute.route__via__count',
                      'detailedroute.route__drc_errors__count',
                      'detailedroute.runtime__total'
]]

In [None]:
metrics_df = metrics_df.rename({
    'run.flow__design' : 'Design',
    'run.flow__variant' : 'Variant',
    'globalroute.timing__setup__ws' : 'GR WNS',
    'detailedroute.route__wirelength' : 'Wire Length',
    'detailedroute.route__via__count' : 'Vias',
    'detailedroute.route__drc_errors__count' : 'DRC Errors',
    'detailedroute.runtime__total' : 'Route Runtime'
}, axis=1)


* Print the head of the head of the data frame. As we can see, all of the parameter values used in the experiment is in the "Variant" string.  We will have to parse the string and create individual columns for the features we are interested in.

In [None]:
metrics_df.head()

In [None]:
metrics_df['GR Success'] = metrics_df['Success'] = True
metrics_df.loc[metrics_df['GR WNS'] == 'N/A','GR Success'] = False
metrics_df.loc[metrics_df['GR WNS'] == 'ERR','GR Success'] = False

metrics_df.loc[metrics_df['DRC Errors'] == 'ERR','Success'] = False
metrics_df.loc[metrics_df['Wire Length'] == 'N/A','Success'] = False
metrics_df.loc[metrics_df['Wire Length'] == 'ERR','Success'] = False
metrics_df.loc[metrics_df['Route Runtime'] == 'N/A','Success'] = False

In [None]:
#metrics_df[(metrics_df['GR Success'] == True) & (metrics_df['Success'] == False)]

In [None]:
#metrics_df[(metrics_df['Route Runtime'] == 'N/A') & (metrics_df['Success'] == True)]

In [None]:
def parse_variant(val):
    variants = val.split("-")
    v_dict = dict()
    for i in range(len(variants)):
        key_val = variants[i].rsplit('_',1)
        v_dict[key_val[0]] = float(key_val[1])
    return pd.Series([v_dict['CORE_UTIL'], v_dict['M1'], v_dict['M2'], v_dict['M3'], v_dict['M4'], v_dict['M5']])

In [None]:
metrics_df[['Core Util', 'M1 Adjust', 'M2 Adjust', 'M3 Adjust', 'M4 Adjust', 'M5 Adjust']] = metrics_df['Variant'].apply(parse_variant)
metrics_df['Weighted Adjust'] = (metrics_df['M1 Adjust'] + metrics_df['M2 Adjust'] + metrics_df['M3 Adjust'] +
                                 metrics_df['M4 Adjust'] + metrics_df['M5 Adjust']) / 5

* Extract individual features from the "Variant" string.
* Print the head of the dataframe after extracting the features and performing further data engineering.  This data frame is now ready to be used.  Notice that we have also created additional columns for capturing whether Global Route and Detailed Route completed. We have also caluclated a 'Weighted Adjust' column that is a simple mean of the 'layer adjust' for each of the routing layers.

In [None]:
metrics_df.head()

In [None]:
success_df = metrics_df[metrics_df['Success'] == True]
success_df = success_df.astype({'Design' : 'string',
                               'Variant' : 'string',
                                'GR WNS' : 'float',
                                'Vias'   : 'int',
                                'DRC Errors' : 'int',
                                'Wire Length' : 'float',
                                'Route Runtime' : 'string'})

In [None]:
def convert_runtime_str(rstr):
    rstr = rstr.split('.')[0]
    hms = rstr.split(':')
    if len(hms) == 3:
        runtime = int(hms[0]) * 3600 + int(hms[1]) * 60 + int(hms[2])
    elif len(hms) == 2:
        runtime = int(hms[0]) * 60 + int(hms[1])
    elif len(hms) == 1:
        runtime = int(hms[0])
    return runtime

In [None]:
success_df['Runtime'] = success_df['Route Runtime'].apply(convert_runtime_str)

* Print the statistics for each of the entries in the dataframe.

In [None]:
success_df.describe().transpose()

* We can observe a huge variation in runtime for the different runs and also a substantial variation in the routed wirelength. We would like to observe the relation between the various parameters on both runtime and routed wirelength.

#### Print some of base metrics from the data set.
* Number of success/failures.
* Min, Max of Wirelength, number of vias, drc errors.

In [None]:
#metrics_df['Success'].value_counts()
print(f'Number of successful runs: {metrics_df[metrics_df["Success"]  == 1].shape[0]}')
print(f'Number of failed runs: {metrics_df[metrics_df["Success"]  == 0].shape[0]}')

In [None]:
min_wire_length = success_df['Wire Length'].min()
max_wire_length = success_df['Wire Length'].max()
print(f"Min Wire Length: {min_wire_length},  Max Wire Length: {max_wire_length}")

In [None]:
min_vias = success_df['Vias'].min()
max_vias = success_df['Vias'].max()
print(f"Min Vias: {min_vias},  Max Vias: {max_vias}")

In [None]:
min_drc_errors = success_df['DRC Errors'].min()
max_drc_errors = success_df['DRC Errors'].max()
print(f"Min DRC Errors: {min_drc_errors},  Max DRC Errors: {max_drc_errors}")

In [None]:
s_df = metrics_df[metrics_df['Success'] == 1].groupby(['Core Util'], as_index = False)['Success'].count()
f_df = metrics_df[metrics_df['Success'] == 0].groupby(['Core Util'], as_index = False)['Success'].count()
#s_df.groupby('Core Util')['Success'].value_counts().plot(kind = "bar", stacked=True, figsize= (10,6))

* Plot the number of successful runs and number of failures with respect to core utlization. We can see that the trends are what we would expect.  As utilization increases, the number of successful runs decreases and the number of failure runs increases.

In [None]:
fig = go.Figure()
fig = make_subplots(rows=1, cols=2,
                    subplot_titles=('Success', 'Failure'),
                    shared_xaxes=True,
                   horizontal_spacing=0.1)
fig.add_trace(go.Bar(x=s_df['Core Util'], y = s_df['Success'], marker = dict(color="green")),
              row=1, col=1)
fig.add_trace(go.Bar(x=f_df['Core Util'], y = f_df['Success'], marker = dict(color = "red")),
              row=1, col=2)
fig.update_layout(width=1000, height=400,
                   title = 'Number of Success and Failures at different Utilizations', title_x = 0.3,
                   margin = dict(l=5, r=50, b=60, t=80, pad=4),
                   showlegend = False)
fig.show()

* We can observe that at utilization values of above 38, there is a drastic increase in the number of failures. We also observe that for the range of layer_adjust settings that we are using, it does not have a direct impact on the number of successful and doomed runs.  Choosing higher layer adjust values will show a more direct correlation to success/doomed runs.
<br />

* Generate a scatter plot of Wirelength Vs Core Utilization.  We can see the trend where the wirelength decreases with increased utilization. However, at a certain utilization value the wirelength starts to increase due to the router trying more detours to resolve DRC errors.

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x = success_df['Core Util'], y = success_df['Wire Length'],
                         hovertext = success_df['Weighted Adjust'],
                         mode = 'markers'))

fig.update_layout(width=1000, height=400,
                   title = 'Wire Length Vs Core Utilization', title_x = 0.5,
                   margin = dict(l=5, r=50, b=60, t=80, pad=4),
                   showlegend = False)
fig.show()


* Let us now examine how the route runtime varies with utilization.

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x = success_df['Core Util'], y = success_df['Runtime'],
                         hovertext = success_df['Weighted Adjust'],
                         mode = 'markers'))

fig.update_layout(width=1000, height=400,
                   title = 'Runtime Vs Core Utilization', title_x = 0.5,
                   margin = dict(l=5, r=50, b=60, t=80, pad=4),
                   showlegend = False)
fig.show()

* We can see that the runtime is fairly flat for smaller utilizations. As the utilization value goes above 35 the runime starts to degrade and rise exponentially.

* Let's now examine the number of DRC errors.

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x = success_df['Core Util'], y = success_df['DRC Errors'],
                         hovertext = success_df['Runtime'],
                         mode = 'markers'))

fig.update_layout(width=1000, height=400,
                   title = 'DRC Errors Vs Core Utilization', title_x = 0.5,
                   margin = dict(l=5, r=50, b=60, t=80, pad=4),
                   showlegend = False)
fig.show()

* Once again, we see an expected trend -- the number of DRC errors is 0 for lower utilizations and starts to increase with utilizations above 35. 

#### Logistic Regression

* Now let use build a logistic regression model to predict successful or dommed runs for this design based on input parameters of utilization and layer adjust values.

In [None]:
from sklearn import linear_model

X = metrics_df[['Core Util']]
y = metrics_df['Success']

regr = linear_model.LinearRegression()
regr.fit(X, y)

predicted_success = regr.predict([[20]])


In [None]:
feature_cols = ['Core Util', 'M1 Adjust', 'M2 Adjust', 'M3 Adjust', 'M4 Adjust', 'M5 Adjust']
X = metrics_df[feature_cols]
y = metrics_df['Success']

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [None]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20,random_state=0)
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)

In [None]:
y_pred = log_reg.predict(X_test)

In [None]:
from sklearn import metrics

* Print the confusion matrix for the trained model

In [None]:
confusion_matrix = metrics.confusion_matrix(y_test, y_pred)
confusion_matrix

In [None]:
print(f'Model Accuracy: {metrics.accuracy_score(y_test, y_pred):.4f}')
print(f'Model Precision: {metrics.precision_score(y_test, y_pred):.4f}')
print(f'Recall Score: {metrics.recall_score(y_test, y_pred):.4f}')

* The above model is very accurate. Now check to see the accuracy of the Weighted Adjust parameter as the sole predictor

In [None]:
feature_cols = [ 'Core Util', 'Weighted Adjust']
X = metrics_df[feature_cols]
y = metrics_df['Success']

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20,random_state=0)
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)

y_pred = log_reg.predict(X_test)

confusion_matrix = metrics.confusion_matrix(y_test, y_pred)
print(confusion_matrix)

print(f'Model Accuracy: {metrics.accuracy_score(y_test, y_pred):.4f}')
print(f'Model Precesion: {metrics.precision_score(y_test, y_pred):.4f}')
print(f'Recall Score: {metrics.recall_score(y_test, y_pred):.4f}')

* For this  dataset, the weighted layer_adjust seem to have similar accuracy as the individual layer adjusts, as within the range of the layer adjusts, the core utilization has more direct impact on success or failure.

* Now for runs that are successful,  let us predict the wirelength based on parameters using a simple linear regression.

In [None]:
#success_df

In [None]:
from sklearn import linear_model

feature_cols = ['Core Util', 'M1 Adjust', 'M2 Adjust', 'M3 Adjust', 'M4 Adjust', 'M5 Adjust']
X = success_df[feature_cols]
y = success_df['Wire Length']

regr = linear_model.LinearRegression()
regr.fit(X, y)

test_params = [34, 0.1, 0.1, 0.1, 0.1, 0.5]
predicted_wire_length = regr.predict([test_params])

actual_wire_length = success_df[((success_df['Core Util'] == 34) & (success_df['M1 Adjust'] == 0.1) & 
                                 (success_df['M2 Adjust'] == 0.1) &(success_df['M3 Adjust'] == 0.1) &
                                 (success_df['M4 Adjust'] == 0.1) & (success_df['M5 Adjust'] == 0.5))].iloc[0]['Wire Length']


print(f'Predicted Wire Length for input parameters: {test_params} is {predicted_wire_length[0]:.2f}')
print(f'Actual Wire Length: {actual_wire_length}')


In [None]:
!jupyter nbconvert --to html --no-input --TemplateExporter.exclude_input=True --no-prompt METRICS2.1-fine-grain.ipynb