In [None]:
!pip install pyyaml spacy
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m45.2 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
import yaml
import spacy

def extract_entities(text):
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(text)
    entities = {}
    for ent in doc.ents:
        entities[ent.label_] = entities.get(ent.label_, []) + [ent.text]
    return entities

def generate_markdown_from_swagger(swagger_file):
    with open(swagger_file, 'r') as f:
        swagger_data = yaml.safe_load(f)

    markdown_content = ""

    # Extracting entities from the Swagger definition
    entities = extract_entities(str(swagger_data))

    # Converting the Swagger definition into Markdown
    markdown_content += "# API Documentation\n\n"

    # Paths
    markdown_content += "## Paths\n\n"
    for path, path_data in swagger_data.get('paths', {}).items():
        markdown_content += f"### {path}\n\n"
        for method, method_data in path_data.items():
            markdown_content += f"#### {method.upper()}\n\n"
            if isinstance(method_data, dict):
                markdown_content += f"**Description:** {method_data.get('summary', 'No description available.')}\n\n"
            else:
                markdown_content += "**Description:** No description available.\n\n"
            # markdown_content += f"**Description:** {method_data.get('summary', 'No description available.')}\n\n"
            markdown_content += f"**Parameters:**\n\n"
            if 'parameters' in method_data:
                markdown_content += "| Name | Type | Description |\n"
                markdown_content += "|------|------|-------------|\n"
                for parameter in method_data['parameters']:
                    markdown_content += f"| {parameter['name']} | {parameter['in']} | {parameter.get('description', 'No description available.')} |\n"
                markdown_content += "\n"
            markdown_content += "---\n\n"

    # Entities
    markdown_content += "## Entities\n\n"
    for entity_type, entities_list in entities.items():
        markdown_content += f"### {entity_type}\n\n"
        for entity in entities_list:
            markdown_content += f"- {entity}\n"
        markdown_content += "\n"

    return markdown_content

# Example usage:
swagger_file = "swagger.yaml"
markdown_output_file = "api_documentation.md"
markdown_content = generate_markdown_from_swagger(swagger_file)

with open(markdown_output_file, 'w') as f:
    f.write(markdown_content)

print(f"Markdown file generated: {markdown_output_file}")

Markdown file generated: api_documentation.md


In [None]:
import spacy
import yaml

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

def extract_named_entities(text):
    doc = nlp(text)
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    return entities

# Load Swagger file (YAML format)
swagger_file_path = "swagger.yaml"
with open(swagger_file_path, "r") as f:
    swagger_data = yaml.safe_load(f)

# Convert Swagger data to text (you may need to extract specific fields)
swagger_text = str(swagger_data)

# Extract named entities
named_entities = extract_named_entities(swagger_text)

# Print named entities and their types
for entity, entity_type in named_entities:
    print(f"Entity: {entity}, Type: {entity_type}")


In [None]:
import spacy
from spacy.matcher import PhraseMatcher
import yaml

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Define your named entities
custom_entities = ["description", "paths", "parameters"]

# Create phrase patterns for the named entities
entity_patterns = [nlp(entity) for entity in custom_entities]

# Initialize PhraseMatcher with the patterns
matcher = PhraseMatcher(nlp.vocab)
matcher.add("CustomEntities", None, *entity_patterns)

def extract_custom_entities(text):
    doc = nlp(text)
    matches = matcher(doc)
    entities = []
    for match_id, start, end in matches:
        entities.append((doc[start:end].text, "CUSTOM_ENTITY"))
    return entities

# Load Swagger file (YAML format)
swagger_file_path = "swagger.yaml"
with open(swagger_file_path, "r") as f:
    swagger_data = yaml.safe_load(f)

# Convert Swagger data to text (you may need to extract specific fields)
swagger_text = str(swagger_data)

# Extract custom entities
custom_entities = extract_custom_entities(swagger_text)

# Print custom entities and their types
for entity, entity_type in custom_entities:
    print(f"Entity: {entity}, Type: {entity_type}")


In [None]:
import yaml

def extract_paths_from_swagger(swagger_file):
    with open(swagger_file, 'r') as f:
        swagger_data = yaml.safe_load(f)

    paths = []

    for path, path_data in swagger_data.get('paths', {}).items():
        for method, method_data in path_data.items():
            if 'operationId' in method_data:
                operation_id = method_data['operationId']
                path_string = path.strip('"')  # Remove quotes from the path
                paths.append((path_string, operation_id))

    return paths

# Example usage:
swagger_file = "swagger.yaml"
paths_with_operation_ids = extract_paths_from_swagger(swagger_file)

# Print extracted paths and their associated operation IDs
for path, operation_id in paths_with_operation_ids:
    print(f"Path: {path}, Operation ID: {operation_id}")


Path: /, Operation ID: getVersions-v2
Path: /v2.0, Operation ID: getVersionInfo-v2.0
Path: /v2.0/extensions, Operation ID: listExtensions-v2.0
Path: /v2.0/extensions/{alias}, Operation ID: getExtension-v2.0
Path: /v2.0/tokens, Operation ID: authenticate-v2.0
Path: /v2.0/tenants, Operation ID: listTenants


In [None]:
import yaml

def extract_paths_and_methods_from_swagger(swagger_file):
    with open(swagger_file, 'r') as f:
        swagger_data = yaml.safe_load(f)

    paths_methods_operation_ids = []

    for path, path_data in swagger_data.get('paths', {}).items():
        for method, method_data in path_data.items():
            if 'operationId' in method_data:
                operation_id = method_data['operationId']
                path_string = path.strip('"')  # Remove quotes from the path
                paths_methods_operation_ids.append((path_string, method.upper(), operation_id))

    return paths_methods_operation_ids

# Example usage:
swagger_file = "swagger.yaml"
paths_methods_operation_ids = extract_paths_and_methods_from_swagger(swagger_file)

# Print extracted paths, methods, and their associated operation IDs
for path, method, operation_id in paths_methods_operation_ids:
    print(f"Path: {path}, Method: {method}, Operation ID: {operation_id}")


Path: /, Method: GET, Operation ID: getVersions-v2
Path: /v2.0, Method: GET, Operation ID: getVersionInfo-v2.0
Path: /v2.0/extensions, Method: GET, Operation ID: listExtensions-v2.0
Path: /v2.0/extensions/{alias}, Method: GET, Operation ID: getExtension-v2.0
Path: /v2.0/tokens, Method: POST, Operation ID: authenticate-v2.0
Path: /v2.0/tenants, Method: GET, Operation ID: listTenants


In [None]:
def print_paths_methods_operation_ids(paths_methods_operation_ids):
    # Print header
    print("| Path | Method | Operation ID |")
    print("|------|--------|--------------|")

    # Print each row
    for path, method, operation_id in paths_methods_operation_ids:
        print(f"| {path} | {method} | {operation_id} |")

# Example usage:
swagger_file = "swagger.yaml"
paths_methods_operation_ids = extract_paths_and_methods_from_swagger(swagger_file)

# Print in tabular format
print_paths_methods_operation_ids(paths_methods_operation_ids)


| Path | Method | Operation ID |
|------|--------|--------------|
| / | GET | getVersions-v2 |
| /v2.0 | GET | getVersionInfo-v2.0 |
| /v2.0/extensions | GET | listExtensions-v2.0 |
| /v2.0/extensions/{alias} | GET | getExtension-v2.0 |
| /v2.0/tokens | POST | authenticate-v2.0 |
| /v2.0/tenants | GET | listTenants |


In [None]:
import csv
import yaml

def extract_paths_and_methods_from_swagger(swagger_file):
    with open(swagger_file, 'r') as f:
        swagger_data = yaml.safe_load(f)

    paths_methods_operation_ids = []

    for path, path_data in swagger_data.get('paths', {}).items():
        for method, method_data in path_data.items():
            if 'operationId' in method_data:
                operation_id = method_data['operationId']
                path_string = path.strip('"')  # Remove quotes from the path
                paths_methods_operation_ids.append((path_string, method.upper(), operation_id))

    return paths_methods_operation_ids

def save_to_csv(data, csv_file):
    with open(csv_file, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['Path', 'Method', 'Operation ID'])
        writer.writerows(data)

# Example usage:
swagger_file = "swagger.yaml"
csv_file = "swagger_paths_methods.csv"

paths_methods_operation_ids = extract_paths_and_methods_from_swagger(swagger_file)

save_to_csv(paths_methods_operation_ids, csv_file)

print(f"Data saved to {csv_file}")


Data saved to swagger_paths_methods.csv


In [None]:
import pandas as pd
df = pd.read_csv("swagger_paths_methods.csv")

In [None]:
df

Unnamed: 0,Path,Method,Operation ID
0,/,GET,getVersions-v2
1,/v2.0,GET,getVersionInfo-v2.0
2,/v2.0/extensions,GET,listExtensions-v2.0
3,/v2.0/extensions/{alias},GET,getExtension-v2.0
4,/v2.0/tokens,POST,authenticate-v2.0
5,/v2.0/tenants,GET,listTenants


In [None]:
import csv

def generate_markdown_from_csv(csv_file):
    markdown_content = "# API Documentation\n\n"

    with open(csv_file, 'r') as f:
        reader = csv.DictReader(f)
        for row in reader:
            path = row['Path']
            method = row['Method']
            operation_id = row['Operation ID']

            markdown_content += f"{path}\n\n"
            markdown_content += f"Method: {method}\n\n"
            markdown_content += f"Operation ID: {operation_id}\n\n"
            markdown_content += "---\n\n"

    return markdown_content

# Example usage:
csv_file = "swagger_paths_methods.csv"
markdown_output_file = "api_documentation_from_csv.md"

markdown_content = generate_markdown_from_csv(csv_file)

with open(markdown_output_file, 'w') as f:
    f.write(markdown_content)

print(f"Markdown file generated: {markdown_output_file}")


Markdown file generated: api_documentation_from_csv.md


In [None]:
import torch
from transformers import BertForMaskedLM, BertTokenizer
import os

# Load pre-trained BERT model and tokenizer
model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForMaskedLM.from_pretrained(model_name)

# Function to generate documentation
def generate_documentation(swagger_file):
    input_text = f"Swagger File:\n{swagger_file}\nDocumentation:"
    input_ids = tokenizer.encode(input_text, return_tensors='pt')

    # Generate documentation using the model
    output = model.generate(input_ids, max_length=500, num_return_sequences=1)
    generated_doc = tokenizer.decode(output[0], skip_special_tokens=True)

    return generated_doc

# Function to save the generated documentation to a file
def save_documentation(generated_doc, file_path):
    with open(file_path, "w") as file:
        file.write(generated_doc)

# Function to download the generated documentation
def download_documentation(file_path):
    # Provide the file path to the generated documentation
    # In this example, let's assume the file is in the current directory
    # You might need to adjust this based on your actual file location
    file_name = os.path.basename(file_path)
    with open(file_path, "r") as file:
        documentation_content = file.read()

    # In a web application, you can return the documentation_content
    # as a response to the user's request with appropriate headers
    # For simplicity, we'll just print the content here
    print("Generated Documentation:\n", documentation_content)

# Example usage:
swagger_file = "/content/swagger.json"  # Provide your Swagger file here
generated_documentation = generate_documentation(swagger_file)
save_documentation(generated_documentation, "generated_documentation.txt")
download_documentation("generated_documentation.txt")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Generated Documentation:
 swagger file : / content / swagger. json documentation : pdf ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ;... ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; / ; ; / / / / /.... ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; js.... ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ;..... ;........ ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ;...... ; ; ; ; ; ; ; ; ; ; ;...... ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ;..... ; ; ; ;...... ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ;

In [1]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

In [2]:
# Load pre-trained GPT-2 model and tokenizer
model_name = "gpt2-medium"  # You can also try other sizes like "gpt2-large"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/718 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.52G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [3]:
# Fine-tune the model on your paired data (Swagger, Documentation)
# (You need to implement this part)

def generate_documentation(swagger_file):
    input_text = f"Swagger File:\n{swagger_file}\nDocumentation:"
    input_ids = tokenizer.encode(input_text, return_tensors='pt')

    # Generate documentation using the fine-tuned model
    output_ids = model.generate(input_ids, max_length=500, num_return_sequences=1, temperature=0.7)
    generated_doc = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    return generated_doc



In [5]:
model.config.pad_token_id = tokenizer.eos_token_id

In [10]:
# Example usage:
swagger_file =  # Provide your Swagger file here
generated_documentation = generate_documentation(swagger_file)
print(generated_documentation)

SyntaxError: invalid syntax (<ipython-input-10-895a37cb7183>, line 2)

In [11]:
print(swagger_file)

/content/swagger.json


In [13]:
def read_file_contents(file_path):
    try:
        # Open the file at the given path in read mode
        with open(file_path, 'r') as file:
            # Read the contents of the file
            file_contents = file.read()
        return file_contents
    except FileNotFoundError:
        # Handle the case where the file is not found
        print(f"File '{file_path}' not found.")
        return None
    except Exception as e:
        # Handle other exceptions
        print(f"An error occurred while reading the file: {e}")
        return None

# Example usage:
file_path = '/content/swagger.json'  # Replace with the actual file path
file_contents = read_file_contents(file_path)
if file_contents is not None:
    print("File contents:")
    print(file_contents)


File contents:
{
  "swagger": "2.0",
  "info": {
    "version": "1.0",
    "title": "Hello World API"
  },
  "paths": {
    "/hello/{user}": {
      "get": {
        "description": "Returns a greeting to the user!",
        "parameters": [
          {
            "name": "user",
            "in": "path",
            "type": "string",
            "required": true,
            "description": "The name of the user to greet."
          }
        ],
        "responses": {
          "200": {
            "description": "Returns the greeting.",
            "schema": {
              "type": "string"
            }
          },
          "400": {
            "description": "Invalid characters in \"user\" were provided."
          }
        }
      }
    }
  }
}


In [15]:
# Example usage:
swagger_file = file_contents  # Provide your Swagger file here
generated_documentation = generate_documentation(swagger_file)
print(generated_documentation)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Swagger File:
{
  "swagger": "2.0",
  "info": {
    "version": "1.0",
    "title": "Hello World API"
  },
  "paths": {
    "/hello/{user}": {
      "get": {
        "description": "Returns a greeting to the user!",
        "parameters": [
          {
            "name": "user",
            "in": "path",
            "type": "string",
            "required": true,
            "description": "The name of the user to greet."
          }
        ],
        "responses": {
          "200": {
            "description": "Returns the greeting.",
            "schema": {
              "type": "string"
            }
          },
          "400": {
            "description": "Invalid characters in \"user\" were provided."
          }
        }
      }
    }
  }
}
Documentation:
{

"swagger": "2.0",

"info": {

"version": "1.0",

"title": "Hello World API",

"description": "Hello World API",

"paths": {

"/hello/{user}": {

"type": "string",
