In [1]:
import vertexai
from vertexai.generative_models import GenerativeModel, SafetySetting
import json
from tqdm import tqdm


In [2]:
def get_response(prompt):
    final_response = ""
    
    responses = model.generate_content(
        prompt,
        generation_config=generation_config,
        safety_settings=safety_settings,
        stream=True,
    )

    for response in responses:
        final_response += response.text

    if final_response.startswith("```json"):
        parts = final_response.split(maxsplit=1)
        final_response = parts[-1].strip("`")
    else:
        final_response = final_response.strip("`")
    print(final_response)
    return final_response

generation_config = {
    "max_output_tokens": 8192,
    "temperature": 1,
    "top_p": 0.95,
}

safety_settings = [
    SafetySetting(
        category=SafetySetting.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
        threshold=SafetySetting.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE
    ),
    SafetySetting(
        category=SafetySetting.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
        threshold=SafetySetting.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE
    ),
    SafetySetting(
        category=SafetySetting.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
        threshold=SafetySetting.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE
    ),
    SafetySetting(
        category=SafetySetting.HarmCategory.HARM_CATEGORY_HARASSMENT,
        threshold=SafetySetting.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE
    ),
]


vertexai.init(project="GCP PROJECT ID", location="GCP PROJECT LOCATION")
model = GenerativeModel(
        "gemini-1.5-flash-001",
    )


In [3]:
def extract_description_from_response(file, response):
    response_parsed = json.loads(response)
    file['description']['file_type'] = response_parsed['file_type']
    file['description']['file_name'] = response_parsed['file_name']
    file['description']['file_description'] = response_parsed['file_description']
    return file  
    

In [4]:
def add_file_description(file):
    attempts = 0
    max_attempts = 1
    success = False
    response = ''

    while not success and attempts < max_attempts:
        try:
            template=f""" You are a Next js web app document writer.
                         identify file type, name and describe 2-4 lines about the content of the file. 
                         based on folder path : {file['folder_structure']} 
                         and content : {file['content']} 

                         file type should be one of the following: Markdown, JavaScript, TypeScript, CSS, JSON, HTML, .env, git, other
                        
                         output: in json format, dont give anything else. Also dont include ```json in the output.
                         file_type: choose from the following: Markdown, JavaScript, TypeScript, CSS, JSON, HTML, .env, git, other
                         file_name: ## generate answer here ##
                         file_description: ## generate answer here ##
                         """
            
            response = get_response(template)
            file = extract_description_from_response(file, response)
            success = True
            return file, success, attempts
        
        except Exception as e:
            attempts += 1
            print("Error in getting response: ", e)
            try:
                
                template_with_error_context = f"""You are a Next js web app document writer.
                         identify file type, name and describe 2-4 lines about the content of the file. 
                         based on folder path : {file['folder_structure']} 
                         and content : {file['content']} 

                         file type should be one of the following: Markdown, JavaScript, TypeScript, CSS, JSON, HTML, .env, git, other
                        
                         output: in json format, dont give anything else. Also dont include ```json in the output.
                         file_type: choose from the following: Markdown, JavaScript, TypeScript, CSS, JSON, HTML, .env, git, other
                         file_name: ## generate answer here ##
                         file_description: ## generate answer here ##

                         previous_response where output format was not followed: {response}

                    strictly return the response in json format as specified, dont give anything else. Also dont include ```json in the output.
                    file_type: choose from the following: Markdown, JavaScript, TypeScript, CSS, JSON, HTML, .env, git, other
                    file_name: ## generate answer here ##
                    file_description: ## generate answer here ##"""
                
                response = get_response(template_with_error_context)
                file = extract_description_from_response(file, response)
                success = True
                return file, success, attempts
            
            except Exception as e:
                continue
            

    file['description'] = []
    return file, success, attempts


In [5]:
def extract_variable_from_response(file, response):
    variables = []
    response_parsed = json.loads(response)
    for variable in response_parsed:
        variables.append(
                    {
                        'variable_name': variable['variable_name'],
                        'variable_dtype': variable['variable_dtype'],
                        'variable_description': variable['variable_description']
                    }
                )
    file['variables'] = variables
    return file

In [6]:
def add_variable_description(file):
    attempts = 0
    max_attempts = 3
    success = False
    response = ''
    
    while not success and attempts < max_attempts:
        try:
            template=f"""You are a Next.js web application code document writer. Scan the code and identify all the variables in the {file['content']}. 
                        For each variable, identify the variable name, data type, and provide a brief description of the variable based on the code provided. 
                        Additionally, analyze the folder path: {file['folder_structure']}, and incorporate any relevant insights into the variable descriptions.
                        Choose the `variable_dtype` from the following options: string, number, boolean, array, object, function, custom, other.
                        
                        Output the result as a JSON array of objects, where each object contains the following keys: variable_name, variable_dtype and variable_description.
                        dont give anything else. Also dont include ```json in the output.
                        """
            
            response = get_response(template)
            file = extract_variable_from_response(file, response)
            success = True
            return file, success, attempts
        
        except Exception as e:
            attempts += 1
            
            try:
                template_with_error_context = f"""You are a Next.js web application code document writer. Scan the code and identify all the variables in the {file['content']}. 
                        For each variable, identify the variable name, data type, and provide a brief description of the variable based on the code provided. 
                        Additionally, analyze the folder path: {file['folder_structure']}, and incorporate any relevant insights into the variable descriptions.
                        Choose the `variable_dtype` from the following options: string, number, boolean, array, object, function, custom, other.
                        
                        Output the result as a JSON array of objects, where each object contains the following keys: variable_name, variable_dtype and variable_description.
                        dont give anything else. Also dont include ```json in the output.

                        previous_response where output format was not followed: {response}
                        
                        strictly return the response in json format as specified, dont give anything else. Also dont include ```json in the output.
                        "variable_name": ## variable name ##,
                        "variable_dtype": choose from the following: string, number, boolean, array, object, function, custom, other,
                        "variable_description": ## generate answer here ##
                        """
                response = get_response(template_with_error_context)
                file = extract_variable_from_response(file, response)
                success = True
                return file, success, attempts
            
            except Exception as e:
                continue

    file['variables'] = []
    return file, success, attempts

In [7]:
def extract_function_from_response(file, response):
    functions = []
    response_parsed = json.loads(response)

    for function in response_parsed:
        return_types = []
        arguments = []
        for return_type in function['function_return_type']:
            return_types.append(
                        {
                            'return_dtype': return_type['return_dtype'],
                            'return_description': return_type['return_description']
                        }
                    )
        for argument in function['function_args']:
            arguments.append(
                        {
                            'arg_name': argument['arg_name'],
                            'arg_dtype': argument['arg_dtype'],
                            'arg_description': argument['arg_description']
                        }
                    )
        functions.append(
                    {
                        'function_name': function['function_name'],
                        'function_description': function['function_description'],
                        'function_return_type': return_types,
                        'function_args': arguments
                    }
                )
            
    file['functions'] = functions
    
    return file

In [8]:
def add_function_description(file):
    attempts = 0
    max_attempts = 3
    success = False
    response = ''    

    while not success and attempts < max_attempts:
        try:
            template=f"""You are a Next.js web application code document writer who strictly follow the provided output format and does not include anything else other that provided output format. 
                        Scan the code provided in `{file['content']}` and identify all the functions present.
                        For each function, perform the following tasks and strictly follow the provided output format:

                            1. **Identify the function name**.
                            2. **Describe its purpose and role** in the code.
                            3. **Identify the return type** of the function and provide an explanation for what is returned and why.
                            4. **Identify the function's arguments**, describing the role of each argument.

                        Additionally, analyze the folder path `{file['folder_structure']}` and incorporate any relevant insights into the function descriptions.

                        Special cases:
                        - If no functions are found, return an empty array.
                        - If you find a function that is not defined or imported in the code, provide the information you can, such as the function name and description.
                        - Ignore variables that are not functions.

                        ## output format ##: Output the result as a array of objects, where each object contains the following keys:
                        - `function_name`
                        - `function_description`
                        - `function_return_type`: an array of objects with the keys `return_dtype` and `return_description`
                        - `function_args`: an array of objects with the keys `arg_name`, `arg_dtype`, and `arg_description`.

                        ## output should strat with [ and end with ] ##
                        dont include any comments or notes in the output.
                        Ensure the output strictly follows this format. Do not include anything else even ```json or ```.
                        """

            response = get_response(template)
            file = extract_function_from_response(file, response)
            success = True
            return file, success, attempts
        
        except Exception as e:
            attempts += 1

            try:
                template_with_error_context = f"""You are a Next.js web application code document writer who strictly follow the provided output format and does not include anything else other that provided output format. 
                        Scan the code provided in `{file['content']}` and identify all the functions present.
                        For each function, perform the following tasks and strictly follow the provided output format:

                            1. **Identify the function name**.
                            2. **Describe its purpose and role** in the code.
                            3. **Identify the return type** of the function and provide an explanation for what is returned and why.
                            4. **Identify the function's arguments**, describing the role of each argument.

                        Additionally, analyze the folder path `{file['folder_structure']}` and incorporate any relevant insights into the function descriptions.

                        Special cases:
                        - If no functions are found, return an empty array.
                        - Ignore variables that are not functions.

                        ## output format ##: Output the result as a array of objects, where each object contains the following keys:
                        - `function_name`
                        - `function_description`
                        - `function_return_type`: an array of objects with the keys `return_dtype` and `return_description`
                        - `function_args`: an array of objects with the keys `arg_name`, `arg_dtype`, and `arg_description`.

                        ## output should strat with [ and end with ] ##
                        dont include any comments or notes in the output.
                        Ensure the output strictly follows this format. Do not include anything else even ```json or ```.
                         
                        previous_response where output format was not followed: {response}

                        strictly return the response in json format as specified, dont give anything else. Also dont include ```json in the output.
                           """
                response = get_response(template_with_error_context)
                file = extract_function_from_response(file, response)
                success = True
                return file, success, attempts
            
            except Exception as e:
                continue

    file['functions'] = []
    return file, success, attempts

In [9]:
def extract_imports_from_response(file, response):
    imports = []
    response_parsed = json.loads(response)
    for import_var in response_parsed:
        imports.append({'import_name': import_var['import_name'],
                    'import_description': import_var['import_description'],
                    'import_file_path': import_var['imported_from']})
    file['imports'] = imports
    return file

In [10]:
def add_import_description(file):
    attempts = 0
    max_attempts = 3
    success = False
    response = ''

    
    while not success and attempts < max_attempts:
        try:

            template=f"""You are a Next.js web application code documentation assistant who follows the provided output format strictly.
                        Your task is to analyze the provided code and identify all import statements within the code: {file['content']}. For each import, perform the following:

                        1. Identify the imported module or item.
                        2. Analyze why this specific import is needed based on the code context and Describe briefly what the import does.
                        3. Determine the file path from which the import is made.

                        Also, examine the folder structure: {file['folder_structure']}, and incorporate any relevant insights that enhance the description of the imports.
                        If no imports are found, return an empty array [].
                        
                        ## output format ##:
                        The output should be a JSON array of objects, where each object includes:
                        - "import_name": the name of the imported module or item.
                        - "import_description": a brief description of the import's purpose.
                        - "imported_from": the file path from where the import is made and ## not where it is being imported to so it cant be same file ##.

                        Ensure the output starts with [ and ends with ]. No other information or formatting should be included ``` or ```json
                        If no imports are found, return an empty array [].
                        """
            
            response = get_response(template)
            file = extract_imports_from_response(file, response)
            success = True
            return file, success, attempts
        
        except Exception as e:
            attempts += 1
            print("Error in getting response: ", e)
            try:
                template_with_error_context =f"""
                        You are a Next.js web application code documentation assistant who follows the provided output format strictly.
                        Your task is to analyze the provided code and identify all import statements within the code: {file['content']}. For each import, perform the following:

                        1. Identify the imported module or item.
                        2. Analyze why this specific import is needed based on the code context and Describe briefly what the import does.
                        3. Determine the file path from which the import is made.

                        Also, examine the folder structure: {file['folder_structure']}, and incorporate any relevant insights that enhance the description of the imports.
                        If no imports are found, return an empty array [].
                        
                        ## output format ##:
                        The output should be a JSON array of objects, where each object includes:
                        - "import_name": the name of the imported module or item.
                        - "import_description": a brief description of the import's purpose.
                        - "imported_from": the file path from where the import is made and ## not where it is being imported to so it cant be same file ##.

                        Ensure the output starts with [ and ends with ]. No other information or formatting should be included ``` or ```json
                         
                        previous_response where output format was not followed: {response}

                        strictly return the response in json format as specified, dont give anything else. Also dont include ```json in the output.
                        If no imports are found, return an empty array [].
                           """
                response = get_response(template_with_error_context)
                file = extract_function_from_response(file, response)
                success = True
                return file, success, attempts
            
            except Exception as e:
                continue


    file['imports'] = []
    return file, success, attempts

In [11]:
def extract_props_from_response(file, response):
    props = []
    response_parsed = json.loads(response)

    for prop in response_parsed:
        props.append({'prop_name': prop['prop_name'],
                        'prop_dtype': prop['prop_dtype'],
                        'prop_description': prop['prop_description']})
            
    file['props'] = props
    return file

In [12]:
def add_props_description(file):
    attempts = 0
    max_attempts = 4
    success = False
    response = ''
    
    while not success and attempts < max_attempts:
        try:
            template=f"""You are a Next.js web application code documentation assistant who follows the provided output format strictly.
                        Your task is to analyze the provided code and identify all props and interfcae statements within the code: {file['content']}. For each import, perform the following:

                        1. Identify the prop or interface name.
                        2. Analyze why this specific prop is needed based on the code context and Describe briefly what the prop does.
                        3. Determine the data type of the prop.

                        Also, examine the folder structure: {file['folder_structure']}, and incorporate any relevant insights that enhance the description of the prop.
                        If no prop are found, return an empty array.
                        
                        Ensure the output starts with [ and ends with ]. No other information or formatting should be included ``` or ```json
                        the following output format should be followed strictly and no additional information should be included.

                        ## output format ##:
                        The output should be a JSON array of objects, where each object includes:
                        - "prop_name": the name of the prop module or interface.
                        - "prop_dtype": the data type of prop.
                        - "prop_description": prop description.

                        output format should be followed strictly and additional information should not be included in the response.
                        """

            response = get_response(template)
            file = extract_props_from_response(file, response)
            success = True
            return file, success, attempts
        
        except Exception as e:
            attempts += 1

            try:
                template_with_error_context = f"""You are a Next.js web application code documentation assistant who follows the provided output format strictly.
                        Your task is to analyze the provided code and identify all props and interfcae statements within the code: {file['content']}. For each import, perform the following:

                        1. Identify the prop or interface name.
                        2. Analyze why this specific prop is needed based on the code context and Describe briefly what the prop does.
                        3. Determine the data type of the prop.

                        Also, examine the folder structure: {file['folder_structure']}, and incorporate any relevant insights that enhance the description of the prop.
                        If no prop are found, return an empty array.
                        
                        Ensure the output starts with [ and ends with ]. No other information or formatting should be included ``` or ```json
                        the following output format should be followed strictly and no additional information should be included.

                        ## output format ##:
                        The output should be a JSON array of objects, where each object includes:
                        - "prop_name": the name of the prop module or interface.
                        - "prop_dtype": the data type of prop.
                        - "prop_description": prop description.

                        output format should be followed strictly and additional information should not be included in the response.
                                                 
                        previous_response where output format was not followed: {response}

                        strictly return the response in json format as specified, dont give anything else. Also dont include ```json in the output.
                           """
                response = get_response(template_with_error_context)
                file = extract_function_from_response(file, response)
                success = True
                return file, success, attempts
            
            except Exception as e:
                continue

    file['props'] = []
    return file, success, attempts

In [17]:
from time import sleep
from IPython.display import clear_output, display
import time


with open('repo_data_sample.json', 'r') as file:
    data = json.load(file)

result = {}

project_c = 0

processed_files = []
raw_files_c = 0

for project in data:
    for file in tqdm(project['repo_files']):
        if file['status'] == 'raw':
            raw_files_c += 1


for project in data:
    project_c += 1

    if project_c == 1:
        for file in tqdm(project['repo_files']):
            description_Status = file['description_status']
            description_Attempts = 0
            variable_Status = file['variables_status']
            variable_Attempts = 0
            function_status = file['functions_status']
            function_Attempts = 0
            import_status = file['imports_status']
            import_Attempts = 0
            props_status = file['props_status']
            props_Attempts = 0

            if file['status'] == 'raw':
                print('total raw files: ', raw_files_c)
                print('\n Processing file: ', file['folder_structure'])

                if file['description_status'] == 'raw':
                    file, description_Status, description_Attempts = add_file_description(file)
                    print('Added file description')

                    if description_Status:
                        file['description_status'] = 'processed'

                if file['variables_status'] == 'raw':
                    file, variable_Status, variable_Attempts = add_variable_description(file)
                    print('Added variable description')

                    if variable_Status:
                        file['variables_status'] = 'processed'


                if (file['folder_structure'] in ['.gitignore', 'README.md'] or file['folder_structure'].endswith('.json')):
                    print('File is not scanned for function description')
                    function_status = 'Not Scanned'
                    function_Attempts = 0
                    file['functions_status'] = 'processed'
                elif file['functions_status'] == 'raw':
                    file, function_status, function_Attempts = add_function_description(file)
                    if function_status:
                        file['functions_status'] = 'processed'
                    print('Added method description')

                if (file['folder_structure'] in ['.gitignore', 'README.md'] or file['folder_structure'].endswith('.json') or file['folder_structure'].startswith('.env')):
                    print('File is not scanned for import description')
                    import_status = 'Not Scanned'
                    import_Attempts = 0
                    file['imports_status'] = 'processed'
                elif file['imports_status'] == 'raw':
                    file, import_status, import_Attempts = add_import_description(file)
                    print('Added import description')
                    if import_status:
                        file['imports_status'] = 'processed'

                if file['folder_structure'].endswith(('.js', '.ts', '.tsx', '.jsx')) and file['props_status'] == 'raw':
                    file, props_status, props_Attempts = add_props_description(file)
                    print('Added props description')
                    if props_status:
                        file['props_status'] = 'processed'
                elif file['props_status'] == 'raw':
                    props_status = 'Not Scanned'
                    props_Attempts = 0
                    file['props_status'] = 'processed'
                    print('File is not scanned for props description')

                start_time = time.time()

                result[file['folder_structure']] = [[description_Status, description_Attempts], 
                                                    [variable_Status, variable_Attempts], 
                                                    [function_status, function_Attempts], 
                                                    [import_status, import_Attempts], 
                                                    [props_status, props_Attempts]]
                
                
                for i in result:
                    status_list = ', '.join(str(status) for status in result[i])
                    print(f"{i:<35}: [{status_list}]")

                if (description_Status or file['description_status'] == 'processed') and (variable_Status or file['variables_status'] == 'processed') and \
                (function_status or function_status=='Not Scanned' or file['functions_status'] == 'processed') and (import_status or import_status=='Not Scanned' or file['imports_status'] == 'processed') \
                and (props_status or props_status=='Not Scanned' or file['props_status'] == 'processed'):
                    file['status'] = 'processed'
                else:
                    user_input = input('Do you want to mark this file as processed? (y/n): ')
                    if user_input == 'y':
                        file['status'] = 'processed'
                        file['description_status'] = 'processed'
                        file['variables_status'] = 'processed'
                        file['functions_status'] = 'processed'
                        file['imports_status'] = 'processed'
                        file['props_status'] = 'processed'
                    else:
                        file['status'] = 'raw'


                for p_file in project['repo_files']:
                    if p_file['folder_structure'] == file['folder_structure']:
                        p_file.update(file)
                
                with open('repo_data_sample.json', 'w') as file_out:
                    json.dump(data, file_out, indent=4)

                
                end_time = time.time()  # Record end time
                time_taken = end_time - start_time



                if time_taken < 60:
                    sleep(60 - time_taken)
                clear_output(wait=True)


    

100%|██████████| 88/88 [00:00<?, ?it/s]
100%|██████████| 88/88 [00:00<00:00, 87901.58it/s]


In [14]:
for i in result:
    status_list = ', '.join(str(status) for status in result[i])
    print(f"{i:<75}: [{status_list}]")

src/app/components/homepage/utility/AccordianComp.tsx                      : [['processed', 0], ['processed', 0], ['processed', 0], [True, 0], ['processed', 0]]
src/app/components/homepage/utility/BackgroundScene.tsx                    : [['processed', 0], ['processed', 0], ['processed', 0], [True, 0], ['processed', 0]]
src/app/components/homepage/utility/SkillAnimatedBeam.tsx                  : [['processed', 0], ['processed', 0], ['processed', 0], [True, 0], ['processed', 0]]
src/app/components/homepage/utility/SkillCard.tsx                          : [['processed', 0], ['processed', 0], ['processed', 0], [True, 0], ['processed', 0]]
src/app/components/projectpage/utility/CommentThread.tsx                   : [['processed', 0], ['processed', 0], ['processed', 0], [True, 0], ['processed', 0]]
src/app/components/projectpage/videoprocessing/OneWayStream.tsx            : [['processed', 0], ['processed', 0], ['processed', 0], [True, 0], ['processed', 0]]
src/app/components/projectpage/vid

In [1]:
for project in data:
    for file in tqdm(project['repo_files']):
        if file['status'] == 'raw':
            raw_files_c += 1
            print(file['folder_structure'])

NameError: name 'data' is not defined