In [None]:
import torch.nn.functional as F
import torch
import torch.nn as nn
import re
import io
import sys
import google.generativeai as genai
from google.colab import userdata
from IPython.display import display
from IPython.display import Markdown

import pathlib
import textwrap
import time
import pandas as pd

In [1]:
class DynamicConvNet(nn.Module):
    def __init__(self, in_channels, num_layers, layer_channels, kernel_sizes, paddings, pool_size, dropout_probs, use_batch_norm, use_avg_pool, num_classes):
        super(DynamicConvNet, self).__init__()
        layers = []

        # Add convolutional layers
        for i in range(num_layers):
            layers.append(
                nn.Conv2d(
                    in_channels=in_channels if i == 0 else layer_channels[i-1],
                    out_channels=layer_channels[i],
                    kernel_size=kernel_sizes[i],
                    stride=1,
                    padding=paddings[i]
                )
            )
            if use_batch_norm[i]:
                layers.append(nn.BatchNorm2d(layer_channels[i]))
            layers.append(nn.ReLU(inplace=True))
            if dropout_probs[i] > 0:
                layers.append(nn.Dropout(dropout_probs[i]))
            if (i + 1) % 2 == 0:  # Add pooling after every second conv layer
                if use_avg_pool[i]:
                    layers.append(nn.AvgPool2d(kernel_size=pool_size, stride=pool_size))
                else:
                    layers.append(nn.MaxPool2d(kernel_size=pool_size, stride=pool_size))

        self.conv_layers = nn.Sequential(*layers)

        # Calculate the size of the flattened feature map after convolutions and pooling
        self.fc_input_size = self._get_fc_input_size(in_channels, num_layers, layer_channels, kernel_sizes, paddings, pool_size)

        self.fc = nn.Linear(self.fc_input_size, num_classes)

    def _get_fc_input_size(self, in_channels, num_layers, layer_channels, kernel_sizes, paddings, pool_size):
        # Assuming input size of (3, 32, 32)
        input_size = 32
        for i in range(num_layers):
            if paddings[i] == 'same':
                output_size = input_size
            else:
                output_size = (input_size - kernel_sizes[i] + 2 * paddings[i]) + 1
            if (i + 1) % 2 == 0:  # Apply pooling
                output_size = output_size // pool_size
            input_size = output_size

        return layer_channels[-1] * output_size * output_size

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(-1, self.fc_input_size)
        x = self.fc(x)
        return x

def create_models(num_models):
    models = []
    for i in range(num_models):
        num_layers = (i % 30) + 1  # Vary the number of layers between 1 and 30
        layer_channels = [6 * (j + 1) for j in range(num_layers)]  # Increment channels by 6 for each layer
        kernel_sizes = [[3, 5, 7, 9, 11][i//60] for _ in range(num_layers)]  # Using kernel size between [3, 5, 7, 9, 11] for simplicity
        paddings = [1 for _ in range(num_layers)]  # Using padding of 1 for simplicity
        pool_size = 2  # Pooling size
        dropout_probs = [0.5 if j % 2 == 0 else 0 for j in range(num_layers)]  # Dropout every second layer
        use_batch_norm = [True if j % 2 == 1 else False for j in range(num_layers)]  # Batch norm every second layer
        use_avg_pool = [True if j % 2 == 0 else False for j in range(num_layers)]  # Average pooling every second layer
        num_classes = 10

        model = DynamicConvNet(3, num_layers, layer_channels, kernel_sizes, paddings, pool_size, dropout_probs, use_batch_norm, use_avg_pool, num_classes)
        models.append(model)

    return models

cnn_models = create_models(240)




In [2]:
class DynamicLSTMNet(nn.Module):
    def __init__(self, input_size, hidden_sizes, num_layers, dropout_probs, bidirectional, num_classes):
        super(DynamicLSTMNet, self).__init__()
        self.lstm_layers = nn.ModuleList()
        self.dropouts = nn.ModuleList()

        for i in range(num_layers):
            input_dim = input_size if i == 0 else hidden_sizes[i-1]
            self.lstm_layers.append(
                nn.LSTM(
                    input_size=input_dim,
                    hidden_size=hidden_sizes[i],
                    num_layers=1,
                    batch_first=True,
                    bidirectional=bidirectional[i]
                )
            )
            self.dropouts.append(nn.Dropout(dropout_probs[i]))

        self.fc_input_size = hidden_sizes[-1] * (2 if bidirectional[-1] else 1)
        self.fc = nn.Linear(self.fc_input_size, num_classes)

    def forward(self, x):
        for lstm, dropout in zip(self.lstm_layers, self.dropouts):
            x, _ = lstm(x)
            x = dropout(x)

        x = x[:, -1, :]  # Get the last output of the sequence
        x = self.fc(x)
        return x

def create_lstm_models(num_models):
    models = []
    for i in range(num_models):
        input_size = 10  # Example input size (e.g., number of features in time series data)
        num_layers = (i % 20) + 1  # Vary the number of layers between 1 and 20
        hidden_sizes = [[4, 8, 16, 32, 64, 128][i//50] * (j + 1) for j in range(num_layers)]  # Increment hidden size by [4, 8, 16, 32, 64, 128] for each layer
        dropout_probs = [0.5 if j % 2 == 0 else 0 for j in range(num_layers)]  # Dropout every second layer
        bidirectional = [True if j % 2 == 1 else False for j in range(num_layers)]  # Bidirectional every second layer
        num_classes = 10

        model = DynamicLSTMNet(input_size, hidden_sizes, num_layers, dropout_probs, bidirectional, num_classes)
        models.append(model)

    return models

lstm_models = create_lstm_models(200)



In [3]:
class DynamicTransformer(nn.Module):
    def __init__(self, input_dim, model_dim, num_heads, num_encoder_layers, num_decoder_layers, ff_dim, dropout_probs, num_classes):
        super(DynamicTransformer, self).__init__()

        self.encoder_layer = nn.TransformerEncoderLayer(d_model=model_dim, nhead=num_heads, dim_feedforward=ff_dim, dropout=dropout_probs[0])
        self.encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_encoder_layers)

        self.decoder_layer = nn.TransformerDecoderLayer(d_model=model_dim, nhead=num_heads, dim_feedforward=ff_dim, dropout=dropout_probs[1])
        self.decoder = nn.TransformerDecoder(self.decoder_layer, num_layers=num_decoder_layers)

        self.fc_in = nn.Linear(input_dim, model_dim)
        self.fc_out = nn.Linear(model_dim, num_classes)

        self.dropout = nn.Dropout(dropout_probs[2])
        self.batch_norm = nn.BatchNorm1d(model_dim)

    def forward(self, src, tgt):
        src = self.fc_in(src)
        tgt = self.fc_in(tgt)

        memory = self.encoder(src)
        output = self.decoder(tgt, memory)

        output = self.batch_norm(output.permute(1, 2, 0)).permute(2, 0, 1)
        output = self.dropout(output)

        output = self.fc_out(output)
        return output

def create_transformer_models(num_models):
    models = []
    for i in range(num_models):
        input_dim = [32, 64, 128, 256, 512][i//50]  # Input dimension between
        model_dim = [16, 32, 64, 128, 256][i//50]  # Model dimension between [16, 32, 64, 128, 256]
        num_heads = 8  # Number of heads in multi-head attention
        num_encoder_layers = (i % 8) + 1  # Vary the number of encoder layers between 1 and 6
        num_decoder_layers = (i % 8) + 1  # Vary the number of decoder layers between 1 and 6
        ff_dim = 512  # Feedforward dimension
        dropout_probs = [0.1, 0.1, 0.1]  # Dropout probabilities for encoder, decoder, and final layers
        num_classes = 10

        model = DynamicTransformer(input_dim, model_dim, num_heads, num_encoder_layers, num_decoder_layers, ff_dim, dropout_probs, num_classes)
        models.append(model)

    return models

transformer_models = create_transformer_models(200)





In [4]:
class DynamicGRUNet(nn.Module):
    def __init__(self, input_size, hidden_sizes, num_layers, dropout_probs, bidirectional, num_classes):
        super(DynamicGRUNet, self).__init__()
        self.gru_layers = nn.ModuleList()
        self.dropouts = nn.ModuleList()

        for i in range(num_layers):
            input_dim = input_size if i == 0 else hidden_sizes[i-1]
            self.gru_layers.append(
                nn.GRU(
                    input_size=input_dim,
                    hidden_size=hidden_sizes[i],
                    num_layers=1,
                    batch_first=True,
                    bidirectional=bidirectional[i]
                )
            )
            self.dropouts.append(nn.Dropout(dropout_probs[i]))

        self.fc_input_size = hidden_sizes[-1] * (2 if bidirectional[-1] else 1)
        self.fc = nn.Linear(self.fc_input_size, num_classes)

    def forward(self, x):
        for gru, dropout in zip(self.gru_layers, self.dropouts):
            x, _ = gru(x)
            x = dropout(x)

        x = x[:, -1, :]  # Get the last output of the sequence
        x = self.fc(x)
        return x

def create_gru_models(num_models):
    models = []
    for i in range(num_models):
        input_size = 10  # Example input size (e.g., number of features in time series data)
        num_layers = (i % 20) + 1  # Vary the number of layers between 1 and 20
        hidden_sizes = [[4, 8, 16, 32, 64][i//50] * (j + 1) for j in range(num_layers)]  # Increment hidden size by [4, 8, 16, 32, 64] for each layer
        dropout_probs = [[0.1, 0.2,0.3, 0.4, 0.5][i//50] if j % 2 == 0 else 0 for j in range(num_layers)]  # Dropout every second layer
        bidirectional = [True if j % 2 == 1 else False for j in range(num_layers)]  # Bidirectional every second layer
        num_classes = 10

        model = DynamicGRUNet(input_size, hidden_sizes, num_layers, dropout_probs, bidirectional, num_classes)
        models.append(model)

    return models

gru_models = create_gru_models(200)


In [5]:
class DynamicRNNNet(nn.Module):
    def __init__(self, input_size, hidden_sizes, num_layers, dropout_probs, bidirectional, num_classes):
        super(DynamicRNNNet, self).__init__()
        self.rnn_layers = nn.ModuleList()
        self.dropouts = nn.ModuleList()

        for i in range(num_layers):
            input_dim = input_size if i == 0 else hidden_sizes[i-1]
            self.rnn_layers.append(
                nn.RNN(
                    input_size=input_dim,
                    hidden_size=hidden_sizes[i],
                    num_layers=1,
                    batch_first=True,
                    bidirectional=bidirectional[i]
                )
            )
            self.dropouts.append(nn.Dropout(dropout_probs[i]))

        self.fc_input_size = hidden_sizes[-1] * (2 if bidirectional[-1] else 1)
        self.fc = nn.Linear(self.fc_input_size, num_classes)

    def forward(self, x):
        for rnn, dropout in zip(self.rnn_layers, self.dropouts):
            x, _ = rnn(x)
            x = dropout(x)

        x = x[:, -1, :]  # Get the last output of the sequence
        x = self.fc(x)
        return x

def create_rnn_models(num_models):
    models = []
    for i in range(num_models):
        input_size = 10  # Example input size (e.g., number of features in time series data)
        num_layers = (i % 20) + 1  # Vary the number of layers between 1 and 20
        hidden_sizes = [[4, 8, 16, 32, 64][i//50] * (j + 1) for j in range(num_layers)]  # Increment hidden size by [4, 8, 16, 32, 64] for each layer
        dropout_probs = [[0.1, 0.2,0.3, 0.4, 0.5][i//50] if j % 2 == 0 else 0 for j in range(num_layers)]  # Dropout every second layer
        bidirectional = [True if j % 2 == 1 else False for j in range(num_layers)]  # Bidirectional every second layer
        num_classes = 10

        model = DynamicRNNNet(input_size, hidden_sizes, num_layers, dropout_probs, bidirectional, num_classes)
        models.append(model)

    return models

# Create 50 models
rnn_models = create_rnn_models(200)


In [6]:
all_models = [cnn_models, lstm_models, transformer_models, rnn_models, gru_models]

In [8]:
def parse_dynamic_convnet(arch_string):
    conv_regex = r'Conv2d\((\d+), (\d+), kernel_size=\((\d+), (\d+)\), stride=\((\d+), (\d+)\), padding=\((\d+), (\d+)\)\)'
    relu_regex = r'ReLU\(inplace=(True|False)\)'
    dropout_regex = r'Dropout\(p=(0\.\d+), inplace=(False|True)\)'
    batch_norm_regex = r'BatchNorm2d\((\d+), eps=(\d+e-\d+), momentum=(\d+e-\d+), affine=(True|False), track_running_stats=(True|False)\)'
    maxpool_regex = r'MaxPool2d\(kernel_size=(\d+), stride=(\d+), padding=(\d+), dilation=(\d+), ceil_mode=(False|True)\)'
    linear_regex = r'Linear\(in_features=(\d+), out_features=(\d+), bias=(True|False)\)'

    conv_layers = re.findall(conv_regex, arch_string)
    relu_layers = re.findall(relu_regex, arch_string)
    dropout_layers = re.findall(dropout_regex, arch_string)
    batch_norm_layers = re.findall(batch_norm_regex, arch_string)
    maxpool_layers = re.findall(maxpool_regex, arch_string)
    linear_layer = re.findall(linear_regex, arch_string)

    structured_string = ""

    # Add Conv layers
    layer_index = 1
    for (in_channels, out_channels, kernel_height, kernel_width, stride_height, stride_width, padding_height, padding_width) in conv_layers:
        structured_string += f"Layer{layer_index}(Type=Conv2d | In_channels={in_channels} | Out_channels={out_channels} | Kernel_size=({kernel_height},{kernel_width}) | Stride=({stride_height},{stride_width}) | Padding=({padding_height},{padding_width}))\n"
        layer_index += 1

    # Add ReLU layers
    for inplace in relu_layers:
        structured_string += f"Layer{layer_index}(Type=ReLU | Inplace={inplace})\n"
        layer_index += 1

    # Add Dropout layers
    for p, inplace in dropout_layers:
        structured_string += f"Layer{layer_index}(Type=Dropout | Probability={p} | Inplace={inplace})\n"
        layer_index += 1

    # Add BatchNorm layers
    for (num_features, eps, momentum, affine, track_running_stats) in batch_norm_layers:
        structured_string += f"Layer{layer_index}(Type=BatchNorm2d | Num_features={num_features} | Eps={eps} | Momentum={momentum} | Affine={affine} | Track_running_stats={track_running_stats})\n"
        layer_index += 1

    # Add MaxPool layers
    for (kernel_size, stride, padding, dilation, ceil_mode) in maxpool_layers:
        structured_string += f"Layer{layer_index}(Type=MaxPool2d | Kernel_size={kernel_size} | Stride={stride} | Padding={padding} | Dilation={dilation} | Ceil_mode={ceil_mode})\n"
        layer_index += 1

    # Add Linear layer
    if linear_layer:
        in_features, out_features, bias = linear_layer[0]
        structured_string += f"Layer{layer_index}(Type=Linear | In_features={in_features} | Out_features={out_features} | Bias={bias})"

    return structured_string

def parse_dynamic_recurrent_net(arch_string, rnn_type):
    rnn_regex = fr'{rnn_type}\((\d+), (\d+), batch_first=True(, bidirectional=True)?\)'
    dropout_regex = r'Dropout\(p=(0\.\d+), inplace=(False|True)\)'
    linear_regex = r'Linear\(in_features=(\d+), out_features=(\d+), bias=(True|False)\)'

    rnn_layers = re.findall(rnn_regex, arch_string)
    dropout_layers = re.findall(dropout_regex, arch_string)
    linear_layer = re.findall(linear_regex, arch_string)

    structured_string = ""

    # Add RNN layers
    layer_index = 1
    for (input_size, hidden_size, bidirectional) in rnn_layers:
        bidirectional = True if bidirectional else False
        structured_string += f"Layer{layer_index}(Type={rnn_type} | Input_size={input_size} | Hidden_size={hidden_size} | Bidirectional={bidirectional})\n"
        layer_index += 1

    # Add Dropout layers
    for p, inplace in dropout_layers:
        structured_string += f"Layer{layer_index}(Type=Dropout | Probability={p} | Inplace={inplace})\n"
        layer_index += 1

    # Add Linear layer
    if linear_layer:
        in_features, out_features, bias = linear_layer[0]
        structured_string += f"Layer{layer_index}(Type=Linear | In_features={in_features} | Out_features={out_features} | Bias={bias})"

    return structured_string

def parse_dynamic_transformer(arch_string):
    mha_regex = r'MultiheadAttention\(\s*\(out_proj\): NonDynamicallyQuantizableLinear\(in_features=(\d+), out_features=(\d+), bias=(True|False)\)\s*\)'
    linear_regex = r'Linear\(in_features=(\d+), out_features=(\d+), bias=(True|False)\)'
    dropout_regex = r'Dropout\(p=(0\.\d+), inplace=(False|True)\)'
    layer_norm_regex = r'LayerNorm\(\((\d+),\), eps=(\d+e-\d+), elementwise_affine=(True|False)\)'
    batch_norm_regex = r'BatchNorm1d\((\d+), eps=(\d+e-\d+), momentum=(\d+e-\d+), affine=(True|False), track_running_stats=(True|False)\)'

    mha_layers = re.findall(mha_regex, arch_string)
    linear_layers = re.findall(linear_regex, arch_string)
    dropout_layers = re.findall(dropout_regex, arch_string)
    layer_norm_layers = re.findall(layer_norm_regex, arch_string)
    batch_norm_layer = re.findall(batch_norm_regex, arch_string)

    structured_string = ""
    layer_index = 1

    # Add MultiheadAttention layers
    for (in_features, out_features, bias) in mha_layers:
        structured_string += f"Layer{layer_index}(Type=MultiheadAttention | In_features={in_features} | Out_features={out_features} | Bias={bias})\n"
        layer_index += 1

    # Add Linear layers
    for (in_features, out_features, bias) in linear_layers:
        structured_string += f"Layer{layer_index}(Type=Linear | In_features={in_features} | Out_features={out_features} | Bias={bias})\n"
        layer_index += 1

    # Add Dropout layers
    for p, inplace in dropout_layers:
        structured_string += f"Layer{layer_index}(Type=Dropout | Probability={p} | Inplace={inplace})\n"
        layer_index += 1

    # Add LayerNorm layers
    for (normalized_shape, eps, elementwise_affine) in layer_norm_layers:
        structured_string += f"Layer{layer_index}(Type=LayerNorm | Normalized_shape={normalized_shape} | Eps={eps} | Elementwise_affine={elementwise_affine})\n"
        layer_index += 1

    # Add BatchNorm layer
    if batch_norm_layer:
        num_features, eps, momentum, affine, track_running_stats = batch_norm_layer[0]
        structured_string += f"Layer{layer_index}(Type=BatchNorm1d | Num_features={num_features} | Eps={eps} | Momentum={momentum} | Affine={affine} | Track_running_stats={track_running_stats})\n"

    return structured_string


In [9]:

def count_parameters(model):
    table = PrettyTable(["Modules", "Parameters"])
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad:
            continue
        params = parameter.numel()
        total_params += params
    return total_params

model_descriptions = []
model_names = []
model_parameters = []
model_descriptions_structured = []
i = -1
for models in all_models:
  i+=1
  for j in range(len(models)):
    captured_output = io.StringIO()
    # Redirect standard output temporarily to the buffer
    sys.stdout = captured_output
    print(models[j])
    # Restore standard output
    sys.stdout = sys.__stdout__

    # Access the captured output as a string
    model_arch_string = captured_output.getvalue()
    model_descriptions.append(model_arch_string)
    model_names.append(models[j].__class__.__name__)
    model_parameters.append(count_parameters(models[j]))
    if i==0:
      model_descriptions_structured.append(parse_dynamic_convnet(model_arch_string))
    elif i==1:
      model_descriptions_structured.append(parse_dynamic_recurrent_net(model_arch_string, "LSTM"))
    elif i==4:
      model_descriptions_structured.append(parse_dynamic_recurrent_net(model_arch_string, "GRU"))
    elif i==3:
      model_descriptions_structured.append(parse_dynamic_recurrent_net(model_arch_string, "RNN"))
    elif i==2:
      model_descriptions_structured.append(parse_dynamic_transformer(model_arch_string))


In [None]:
!pip3 install --upgrade --user google-cloud-aiplatform

Collecting google-cloud-aiplatform
  Downloading google_cloud_aiplatform-1.59.0-py2.py3-none-any.whl (5.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.1/5.1 MB[0m [31m16.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: google-cloud-aiplatform
[0mSuccessfully installed google-cloud-aiplatform-1.59.0


In [12]:

genai.configure(api_key="AIzaSyCXVsBNtlYe2U9XFcWPStYcf7xaIH7xtxw")

In [13]:
gemini_model = genai.GenerativeModel('gemini-1.0-pro')


In [52]:
response = gemini_model.generate_content(prompt,
                                         generation_config=genai.types.GenerationConfig(
                                        # Only one candidate for now.
                                        candidate_count=1,
                                        max_output_tokens=20,
                                        temperature=1.0))

In [53]:
response.text

'The model is a DynamicGRUNet, which consists of 20 GRU layers and '

In [15]:
model_summary=[]

In [45]:
for i in range(len(model_summary), len(model_descriptions)):
  prompt = model_descriptions[i] + f"Total Trainable Params: {model_parameters[i]}"+"Give a precise summary of this model like layers, parameters and input and output sizes in a paragraph."
  try:
    response = gemini_model.generate_content(prompt)
  except:
    print("Trying again")
    time.sleep(5)
    response = gemini_model.generate_content(prompt)
  model_summary.append(response.text)

In [47]:
len(model_descriptions)

1040

In [62]:
model_info = {"model_names": model_names, "model_descriptions" : model_descriptions, "model_descriptions_structured" : model_descriptions_structured,  "model_parameters": model_parameters, "model_summary": model_summary}

In [63]:
df = pd.DataFrame(model_info)

In [67]:
df.head()

Unnamed: 0,model_names,model_descriptions,model_descriptions_structured,model_parameters,model_summary
0,DynamicConvNet,DynamicConvNet(\n (conv_layers): Sequential(\...,Layer1(Type=Conv2d | In_channels=3 | Out_chann...,61618,This model is a Dynamic Convolutional Neural N...
1,DynamicConvNet,DynamicConvNet(\n (conv_layers): Sequential(\...,Layer1(Type=Conv2d | In_channels=3 | Out_chann...,31582,This model is a DynamicConvNet consisting of a...
2,DynamicConvNet,DynamicConvNet(\n (conv_layers): Sequential(\...,Layer1(Type=Conv2d | In_channels=3 | Out_chann...,48904,This Dynamic Convolutional Neural Network (Dyn...
3,DynamicConvNet,DynamicConvNet(\n (conv_layers): Sequential(\...,Layer1(Type=Conv2d | In_channels=3 | Out_chann...,22144,The DynamicConvNet is a Convolutional Neural N...
4,DynamicConvNet,DynamicConvNet(\n (conv_layers): Sequential(\...,Layer1(Type=Conv2d | In_channels=3 | Out_chann...,32494,The model is a Dynamic Convolutional Neural Ne...


In [65]:
df.to_csv('model_info_2.csv', index=False)  # Optional: exclude index column