In [90]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import joblib
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [70]:
df = pd.read_csv("modules.csv")

In [71]:
print(df.columns)

Index(['always_blocks', 'and_count', 'bind_count', 'case_count', 'dff_count',
       'estimated_depth', 'if_count', 'instance_count', 'module_name',
       'module_type', 'mux_count', 'not_count', 'num_cells', 'num_wires',
       'or_count', 'plus_count', 'signal_count', 'term_count', 'xor_count'],
      dtype='object')


In [72]:
df.drop(columns=["module_name"],axis=1, inplace=True)

In [73]:
df = pd.get_dummies(df)

In [74]:
df.columns

Index(['always_blocks', 'and_count', 'bind_count', 'case_count', 'dff_count',
       'estimated_depth', 'if_count', 'instance_count', 'mux_count',
       'not_count', 'num_cells', 'num_wires', 'or_count', 'plus_count',
       'signal_count', 'term_count', 'xor_count', 'module_type_Arithmetic',
       'module_type_Combinational', 'module_type_Control',
       'module_type_Counter', 'module_type_Interface', 'module_type_Memory',
       'module_type_Sequential'],
      dtype='object')

In [75]:
df.shape

(795, 24)

In [76]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 795 entries, 0 to 794
Data columns (total 24 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   always_blocks              795 non-null    int64  
 1   and_count                  795 non-null    int64  
 2   bind_count                 795 non-null    int64  
 3   case_count                 795 non-null    int64  
 4   dff_count                  321 non-null    float64
 5   estimated_depth            795 non-null    int64  
 6   if_count                   795 non-null    int64  
 7   instance_count             795 non-null    int64  
 8   mux_count                  467 non-null    float64
 9   not_count                  795 non-null    int64  
 10  num_cells                  730 non-null    float64
 11  num_wires                  730 non-null    float64
 12  or_count                   795 non-null    int64  
 13  plus_count                 795 non-null    int64  

In [77]:
df['mux_count'].fillna(0, inplace=True)  

In [78]:
df['dff_count'].fillna(0, inplace=True)  

In [79]:
df['num_cells'].fillna(0, inplace=True)  

In [80]:
df['num_wires'].fillna(0, inplace=True)  

In [81]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 795 entries, 0 to 794
Data columns (total 24 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   always_blocks              795 non-null    int64  
 1   and_count                  795 non-null    int64  
 2   bind_count                 795 non-null    int64  
 3   case_count                 795 non-null    int64  
 4   dff_count                  795 non-null    float64
 5   estimated_depth            795 non-null    int64  
 6   if_count                   795 non-null    int64  
 7   instance_count             795 non-null    int64  
 8   mux_count                  795 non-null    float64
 9   not_count                  795 non-null    int64  
 10  num_cells                  795 non-null    float64
 11  num_wires                  795 non-null    float64
 12  or_count                   795 non-null    int64  
 13  plus_count                 795 non-null    int64  

In [82]:
X = df.drop(columns=["estimated_depth"],axis=1)
y = df["estimated_depth"]

In [83]:
X.shape,y.shape

((795, 23), (795,))

In [84]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
num_cols = X_train.select_dtypes(include=["int64", "float64"]).columns
bool_cols = X_train.select_dtypes(include=["bool"]).columns

scaler = StandardScaler()
X_train[num_cols] = scaler.fit_transform(X_train[num_cols])
X_test[num_cols] = scaler.transform(X_test[num_cols])

X_train[bool_cols] = X_train[bool_cols].astype(int)
X_test[bool_cols] = X_test[bool_cols].astype(int)

In [87]:
X_train

Unnamed: 0,always_blocks,and_count,bind_count,case_count,dff_count,if_count,instance_count,mux_count,not_count,num_cells,...,signal_count,term_count,xor_count,module_type_Arithmetic,module_type_Combinational,module_type_Control,module_type_Counter,module_type_Interface,module_type_Memory,module_type_Sequential
360,0.0,-0.243409,-0.238604,0.0,-0.480439,0.0,-0.077511,-0.525208,-0.346170,-0.430824,...,-0.237209,-0.280041,-0.212508,0,1,0,0,0,0,0
264,0.0,-0.243409,-0.264066,0.0,-0.105520,0.0,-0.077511,-0.203094,-0.346170,-0.256383,...,-0.273327,-0.305404,-0.212508,0,0,0,1,0,0,0
440,0.0,-0.037547,-0.187680,0.0,-0.480439,0.0,-0.077511,-0.525208,1.394257,0.441384,...,-0.309445,-0.254677,-0.212508,0,0,1,0,0,0,0
328,0.0,1.094695,0.143322,0.0,-0.480439,0.0,-0.077511,-0.525208,-0.346170,-0.430824,...,-0.056619,0.100419,1.256569,0,1,0,0,0,0,0
486,0.0,-0.243409,-0.085833,0.0,-0.480439,0.0,-0.077511,-0.525208,-0.346170,-0.430824,...,-0.201091,-0.153221,-0.212508,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,0.0,-0.140478,-0.034910,0.0,-0.480439,0.0,-0.077511,-0.525208,0.524044,-0.474435,...,-0.056619,-0.077129,-0.212508,0,1,0,0,0,0,0
106,0.0,-0.243409,-0.060372,0.0,-0.480439,0.0,-0.077511,-0.525208,-0.346170,-0.343604,...,-0.020501,-0.127857,-0.212508,1,0,0,0,0,0,0
270,0.0,-0.243409,-0.213142,0.0,0.644318,0.0,-0.077511,0.280077,0.088937,0.048890,...,-0.201091,-0.254677,-0.212508,0,0,0,1,0,0,0
435,0.0,-0.243409,0.117861,0.0,-0.480439,0.0,0.044814,-0.364151,-0.346170,-0.430824,...,-0.092737,0.100419,-0.212508,0,0,0,1,0,0,0


In [88]:
y_train

360     7
264    11
440     9
328    12
486     8
       ..
71     13
106     6
270    12
435    11
102    12
Name: estimated_depth, Length: 636, dtype: int64

In [91]:
joblib.dump(scaler, "models/scaler.pkl")

['models/scaler.pkl']

In [95]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from xgboost import XGBRegressor


models = {
    "LinearRegression":LinearRegression(),
    "RandomForest": RandomForestRegressor(n_estimators=100, random_state=42),
    "GradientBoosting": GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42),
    "XGBoost": XGBRegressor(n_estimators=100, random_state=42),
}

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


results = {}

for name, model in models.items():
    print(f"Training {name}...")
    model.fit(X_train, y_train)
    
    # Save the model
    joblib.dump(model, f"models/{name}.pkl")
    
    # Evaluate
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    
    results[name] = {"MSE": mse, "R2": r2, "MAE": mae}
    
    print(f"{name} Performance:")
    print(f"Mean Squared Error (MSE): {mse:.4f}")
    print(f"R² Score: {r2:.4f}")
    print(f"Mean Absolute Error (MAE): {mae:.4f}\n")

Training LinearRegression...
LinearRegression Performance:
Mean Squared Error (MSE): 18.9904
R² Score: -1.1983
Mean Absolute Error (MAE): 1.9592

Training RandomForest...
RandomForest Performance:
Mean Squared Error (MSE): 3.3610
R² Score: 0.6109
Mean Absolute Error (MAE): 1.3416

Training GradientBoosting...
GradientBoosting Performance:
Mean Squared Error (MSE): 3.2657
R² Score: 0.6220
Mean Absolute Error (MAE): 1.3355

Training XGBoost...
XGBoost Performance:
Mean Squared Error (MSE): 4.2658
R² Score: 0.5062
Mean Absolute Error (MAE): 1.4902



  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)


In [98]:
# Define neural network architecture
import tensorflow as tf
from tensorflow import keras
from keras import layers


model_nn = keras.Sequential([
    layers.Dense(128, activation="relu", input_shape=(X_train.shape[1],)),
    layers.Dropout(0.3),
    layers.Dense(64, activation="relu"),
    layers.Dropout(0.3),
    layers.Dense(32, activation="relu"),
    layers.Dense(1)  # Single output for regression
])

# Compile the model
model_nn.compile(optimizer="adam", loss="mse", metrics=["mae"])

# Train the model
history = model_nn.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=1)

# Evaluate the model
test_loss, test_mae = model_nn.evaluate(X_test, y_test, verbose=1)
y_pred_nn = model_nn.predict(X_test)
r2_nn = r2_score(y_test, y_pred_nn)

print(f"Neural Network Performance:")
print(f"Mean Squared Error (MSE): {test_loss:.4f}")
print(f"Mean Absolute Error (MAE): {test_mae:.4f}")
print(f"R² Score: {r2_nn:.4f}")

# Store results
results["NeuralNetwork"] = {"MSE": test_loss, "R2": r2_nn, "MAE": test_mae}


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 16ms/step - loss: 112.5070 - mae: 10.2687 - val_loss: 90.5862 - val_mae: 8.9417
Epoch 2/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 72.6987 - mae: 8.0044 - val_loss: 168.2252 - val_mae: 5.7949
Epoch 3/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 45.9015 - mae: 4.6708 - val_loss: 142.2682 - val_mae: 4.4821
Epoch 4/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 17.8444 - mae: 3.1918 - val_loss: 119.8367 - val_mae: 4.1231
Epoch 5/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 19.4982 - mae: 3.1575 - val_loss: 97.1083 - val_mae: 3.7814
Epoch 6/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 17.3649 - mae: 3.0562 - val_loss: 97.3975 - val_mae: 3.6903
Epoch 7/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1