In [None]:
!pip install datasets

In [None]:
import csv
import json

def csv_to_json(csv_file_path, json_file_path):
  """ Converts a CSV file with variable X columns and Y rows into a JSON file.

  Args:
    csv_file_path (str): Path to the input CSV file.
    json_file_path (str): Path to the desired output JSON file.
  """
  try:
    # Open the CSV file for reading
    with open(csv_file_path, 'r', encoding='utf-8') as csv_file:
      # Create a CSV reader object
      csv_reader = csv.DictReader(csv_file)

      # Convert CSV rows into a list of dictionaries
      data = list(csv_reader)

    # Open the JSON file for writing
    with open(json_file_path, 'w', encoding='utf-8') as json_file:
      # Dump the list of dictionaries to the JSON file
      json.dump(data, json_file, ensure_ascii=False, indent=4)

    print(f'Successfully converted {csv_file_path} to {json_file_path}.')

  except FileNotFoundError:
    print(f'Error: CSV file {csv_file_path} not found.')

  except Exception as e:
    print(f'An error occurred: {e}')

# Example Usage:
csv_file = '/content/drive/MyDrive/Adversarial_chatbot_dataset _test.csv'
json_file = 'hemanth_teating_adversarical.json'
csv_to_json(csv_file, json_file)

In [None]:
import json

json_file = 'hemanth_teating_adversarical.json'

try:
    # Open the JSON file for reading
    with open(json_file, 'r', encoding='utf-8') as file:
        # Load the JSON content
        data = json.load(file)
        # Display the content
        print(data[:1][0]['input'])

except FileNotFoundError:
    print(f"File '{json_file}' not found.")

except Exception as e:
    print(f"Error: {e}")


In [None]:
import csv
import json

def json_to_csv(json_file_path, csv_file_path):
  """ Converts a JSON file into a CSV file.

  Args:
    json_file_path (str): Path to the input JSON file.
    csv_file_path (str): Path to the desired output CSV file.
  """
  try:
    # Open the JSON file for reading
    with open(json_file_path, 'r', encoding='utf-8') as json_file:
      # Load the JSON data into a list of dictionaries
      data = json.load(json_file)

    # Get the keys of the first dictionary to use as CSV headers
    headers = list(data[0].keys())

    # Open the CSV file for writing
    with open(csv_file_path, 'w', encoding='utf-8', newline='') as csv_file:
      # Create a CSV writer object
      csv_writer = csv.DictWriter(csv_file, fieldnames=headers)

      # Write the headers to the CSV file
      csv_writer.writeheader()

      # Write each dictionary in the list to the CSV file
      for row in data:
        csv_writer.writerow(row)

    print(f'Successfully converted {json_file_path} to {csv_file_path}.')

  except FileNotFoundError:
    print(f'Error: JSON file {json_file_path} not found.')

  except Exception as e:
    print(f'An error occurred: {e}')

# Example Usage:
json_file = '/content/drive/MyDrive/MetaMathQA-395K.json'
csv_file = 'hemanth_adversarical.csv'
json_to_csv(json_file, csv_file)

In [None]:
import csv

with open('hemanth_adversarical.csv', 'r') as csv_file:
    csv_reader = csv.reader(csv_file)

    for row in csv_reader:
        print(', '.join(row))

In [None]:
import csv
import yaml

def csv_to_yaml(csv_file_path, yaml_file_path):
  """ Converts a CSV file with variable X columns and Y rows into a YAML file.

  Args:
    csv_file_path (str): Path to the input CSV file.
    yaml_file_path (str): Path to the desired output YAML file.
  """
  try:
    # Open the CSV file for reading
    with open(csv_file_path, 'r', encoding='utf-8') as csv_file:
      # Create a CSV reader object
      csv_reader = csv.DictReader(csv_file)

      # Convert CSV rows into a list of dictionaries
      data = list(csv_reader)

    # Open the YAML file for writing
    with open(yaml_file_path, 'w', encoding='utf-8') as yaml_file:
      # Dump the list of dictionaries to the YAML file
      yaml.dump(data, yaml_file, allow_unicode=True)

    print(f'Successfully converted {csv_file_path} to {yaml_file_path}.')

  except FileNotFoundError:
    print(f'Error: CSV file {csv_file_path} not found.')

  except Exception as e:
    print(f'An error occurred: {e}')

# Example Usage:
csv_file = '/content/drive/MyDrive/Adversarial_chatbot_dataset _test.csv'
yaml_file = 'hemanth_adversarial.yaml'
csv_to_yaml(csv_file, yaml_file)

In [None]:
!pip install pyyaml


In [None]:
import yaml

yaml_file = 'hemanth_adversarial.yaml'

try:
    # Open the YAML file for reading
    with open(yaml_file, 'r', encoding='utf-8') as file:
        # Load the YAML content
        data = yaml.safe_load(file)
        # Display the content
        print(data[:][:1][0]['output'])

except FileNotFoundError:
    print(f"File '{yaml_file}' not found.")

except Exception as e:
    print(f"Error: {e}")


In [None]:
import os
import json
import yaml

def read_files(folder_path, extensions):
  """ Reads files with specified extensions from a given folder.

  Args:
    folder_path (str): Path to the folder containing the files.
    extensions (list): List of file extensions to filter by (e.g., '.md', '.py').

  Returns:
    A dictionary of file contents, with file paths as keys and content as values.
  """

  # Validate input
  if not os.path.isdir(folder_path):
    raise ValueError(f'Invalid folder path: {folder_path}')
  if not extensions:
    raise ValueError('No file extensions specified.')

  # Initialize dictionary to store file content
  file_contents = {}

  # Iterate through files in the folder
  for file in os.listdir(folder_path):
    # Check if file has a specified extension
    if file.endswith(tuple(extensions)):
      # Get file path
      file_path = os.path.join(folder_path, file)

      # Read file content based on extension
      try:
        if file.endswith('.json'):
          with open(file_path, 'r') as f:
            file_content = json.load(f)
        elif file.endswith('.yaml'):
          with open(file_path, 'r') as f:
            file_content = yaml.safe_load(f)
        else:
          with open(file_path, 'r') as f:
            file_content = f.read()
      except Exception as e:
        print(f'Error reading file {file_path}: {e}')
        continue

      # Add file content to dictionary
      file_contents[file_path] = file_content

  return file_contents


# Example usage
if __name__ == '__main__':
  # Folder path containing files with specified extensions
  folder_path = '/content/drive/MyDrive '

  # List of file extensions to filter by
  extensions = ['.md', '.py', '.csv', '.json', '.yaml']

  # Read files with specified extensions from the folder
  file_contents = read_files(folder_path, extensions)

  # Print file contents
  for file_path, content in file_contents.items():
    print(f'{file_path}:\n{content}')

In [None]:
import os
import json
import yaml

def read_files(folder_path, extensions, recursive=False):
  """ Reads files with specified extensions from a given folder and its subfolders.

  Args:
    folder_path (str): Path to the folder containing the files.
    extensions (list): List of file extensions to filter by (e.g., '.md', '.py').
    recursive (bool, optional): Whether to recursively search subfolders. Defaults to False.

  Returns:
    A dictionary of file contents, with file paths as keys and content as values.
  """

  # Validate input
  if not os.path.isdir(folder_path):
    raise ValueError(f'Invalid folder path: {folder_path}')
  if not extensions:
    raise ValueError('No file extensions specified.')

  # Initialize dictionary to store file content
  file_contents = {}

  # Iterate through files and subfolders in the folder
  for file_or_folder in os.listdir(folder_path):
    file_path = os.path.join(folder_path, file_or_folder)

    # If it's a file, check if it has a specified extension and read its content
    if os.path.isfile(file_path):
      if file_path.endswith(tuple(extensions)):
        try:
          if file_path.endswith('.json'):
            with open(file_path, 'r') as f:
              file_content = json.load(f)
          elif file_path.endswith('.yaml'):
            with open(file_path, 'r') as f:
              file_content = yaml.safe_load(f)
          else:
            with open(file_path, 'r') as f:
              file_content = f.read()
        except Exception as e:
          print(f'Error reading file {file_path}: {e}')
          continue

        # Add file content to dictionary
        file_contents[file_path] = file_content

    # If it's a folder and recursion is enabled, recursively read files from the subfolder
    elif os.path.isdir(file_path) and recursive:
      file_contents.update(read_files(file_path, extensions, recursive))

  return file_contents

In [None]:
folder_path = '/content/drive/MyDrive '
extensions = ['.md', '.py', '.csv', '.json', '.yaml']

file_contents = read_files(folder_path, extensions, recursive=True)

# Print file contents
for file_path, content in file_contents.items():
  print(f'{file_path}:\n{content}')

In [None]:
!pip install PyPDF2

In [None]:
from IPython.display import display, Javascript

# Increase the data rate limit to 10 MB/s
display(Javascript("IPython.notebook.kernel.execute('config.IOPubDataRateLimit = 1000000000')"))

In [None]:
!pip install pandas python-pptx pyyaml

In [None]:

import os
import json
import yaml
import re
import pandas as pd
from pptx import Presentation
from io import StringIO

EXTENSION_READERS = {
    '.md': lambda f: f.read(),
    '.py': lambda f: f.read(),
    '.csv': lambda f: pd.read_csv(f),
    '.json': lambda f: json.load(f),
    '.yaml': lambda f: yaml.safe_load(f),
    '.txt': lambda f: f.read(),
    '.xml': lambda f: f.read(),
    '.html': lambda f: f.read(),
    '.css': lambda f: f.read(),
    '.js': lambda f: f.read(),
    '.java': lambda f: f.read(),
    '.cpp': lambda f: f.read(),
    '.h': lambda f: f.read(),
    '.php': lambda f: f.read(),
    '.rb': lambda f: f.read(),
    '.sql': lambda f: f.read(),
    '.xls': lambda f: pd.read_excel(f),
    '.xlsx': lambda f: pd.read_excel(f),
    '.ppt': lambda f: read_pptx(f),
    '.pptx': lambda f: read_pptx(f)
}

def read_pptx(file):
    """Custom function to read .pptx files with python-pptx"""
    prs = Presentation(file)
    text = []
    for slide in prs.slides:
        for shape in slide.shapes:
            if hasattr(shape, "text"):
                text.append(shape.text)
    return "\n".join(text)
# Utilize regular expressions to match any of the file extensions
EXTENSION_PATTERN = r".*\.(md|py|csv|json|yaml|txt|xml|html|css|js|java|cpp|h|php|rb|sql|xls|xlsx|ppt|pptx)$"

def list_files_with_extensions(directory_path):
    try:
        files = os.listdir(directory_path)
        return [file for file in files if re.match(EXTENSION_PATTERN, file)]
    except FileNotFoundError:
        print(f"The directory {directory_path} was not found.")
        return None

def read_file_content(directory_path, filename):
    try:
        extension = os.path.splitext(filename)[1]
        with open(os.path.join(directory_path, filename), 'r') as file:
            file_reader = EXTENSION_READERS.get(extension)
            return file_reader(file) if file_reader else None
    except Exception as e:
        print(f"An error occurred while reading the file {filename}: {e}")

def process_files(directory_path):
    files = list_files_with_extensions(directory_path)

    if files is None:
        return

    for filename in files:
        content = read_file_content(directory_path, filename)
        if content is not None:
            print(f"--- File: {filename} ---")
            print(content)
            print("-------------------------------\n")

def main():
    # Example usage:
    directory_path = "/content/drive/MyDrive/Models "
    process_files(directory_path)

if __name__ == "__main__":
    main()

In [None]:
import os
from collections import defaultdict

# Step 1: Count subfolders and file types
def count_files_and_subfolders(starting_directory):
    file_types = defaultdict(int)
    total_subfolders = 0

    for root, dirs, files in os.walk(starting_directory):
        total_subfolders += len(dirs)
        for file in files:
            extension = os.path.splitext(file)[1]
            file_types[extension] += 1

    return total_subfolders, file_types

# Step 2: Create dictionary of file paths
def create_extension_dictionary(starting_directory):
    extension_paths = defaultdict(list)

    for root, _, files in os.walk(starting_directory):
        for file in files:
            extension = os.path.splitext(file)[1]
            full_path = os.path.join(root, file)
            extension_paths[extension].append(full_path)

    return extension_paths

# Step 3: User recommendations
def user_recommendations():
    recommendations = {
        "organizing": "Consider using os.makedirs to create directories for each file type.",
        "cleanup": "Use os.unlink to remove files or os.rmdir/os.removedirs to remove directories.",
        "renaming": "Employ os.rename to rename files or directories.",
        "processes": "Use os.system or subprocess.run to execute shell commands.",
        "information": "Use os.path functions like os.path.getsize to check file sizes."
    }
    return recommendations

# Example usage
directory_to_scan = "/content/drive/MyDrive"  # replace with the actual directory path
subfolders_count, file_type_counts = count_files_and_subfolders(directory_to_scan)
extension_file_paths = create_extension_dictionary(directory_to_scan)

print(f"Total subfolders: {subfolders_count}")
print(f"File type counts: {dict(file_type_counts)}")

# The paths dictionary can be huge, so be careful printing it
print(f"Extension paths: {dict(extension_file_paths)}")

recs = user_recommendations()
for key, value in recs.items():
    print(f"Recommendation - {key}: {value}")