In [4]:
import pandas as pd
import os

# Definir la ruta del directorio
directorio = '/home/jupyter-user5/Camda24_resistance/DataSets'

# Leer los archivos
meropenem_df = pd.read_csv(os.path.join(directorio, 'ResistanceMeropenemStrict.tsv.gz'), compression='gzip', sep='\t')
ciprofloxacin_df = pd.read_csv(os.path.join(directorio, 'ResistanceCiprofloxacinStrict.tsv.gz'), compression='gzip', sep='\t')

# Agregar la columna de antibiótico a cada DataFrame como primera columna
meropenem_df.insert(0, 'antibiotico', 'meropenem')
ciprofloxacin_df.insert(0, 'antibiotico', 'ciprofloxacin')

# Unir los DataFrames
df_combinado = pd.concat([meropenem_df, ciprofloxacin_df], ignore_index=True)

# Guardar el DataFrame combinado
archivo_salida = os.path.join(directorio, 'resistencia_antibioticos_combinado.tsv')
df_combinado.to_csv(archivo_salida, sep='\t', index=False)

print(f"Archivo combinado guardado como: {archivo_salida}")
print(f"Dimensiones del archivo combinado: {df_combinado.shape}")
print("\nPrimeras filas del archivo combinado:")
print(df_combinado.head())

# Mostrar información sobre los datos
print("\nInformación sobre los datos:")
print(df_combinado.info())

# Mostrar un resumen estadístico
print("\nResumen estadístico:")
print(df_combinado.describe())

# Contar los valores en la columna 'Antibiotico'
print("\nConteo de filas por antibiótico:")
print(df_combinado['antibiotico'].value_counts())

  exec(code_obj, self.user_global_ns, self.user_ns)


Archivo combinado guardado como: /home/jupyter-user5/Camda24_resistance/DataSets/resistencia_antibioticos_combinado.tsv
Dimensiones del archivo combinado: (7772, 881)

Primeras filas del archivo combinado:
  antibiotico      accession          genus    species  phenotype  mic  \
0   meropenem  GCA_002947415  Acinetobacter  baumannii  Resistant  8.0   
1   meropenem  GCA_002947845  Acinetobacter  baumannii  Resistant  8.0   
2   meropenem  GCA_002948925  Acinetobacter  baumannii  Resistant  8.0   
3   meropenem  GCA_002996805  Acinetobacter  baumannii  Resistant  8.0   
4   meropenem  GCA_003006035  Acinetobacter  baumannii  Resistant  8.0   

   3005053  3000830  3003838  3000508  ...  3007751-D87Y  3003926-D87Y  \
0      0.0      0.0      0.0      0.0  ...           0.0           0.0   
1      0.0      0.0      0.0      0.0  ...           0.0           0.0   
2      0.0      0.0      0.0      0.0  ...           0.0           0.0   
3      0.0      0.0      0.0      0.0  ...           

In [5]:
import pandas as pd
import os

def combine_resistance_data(directory):
    """
    Loads and combines antibiotic resistance data from gzipped TSV files,
    and saves the result in a new TSV file.

    Args:
        directory (str): Path to the directory containing the data files.

    Returns:
        None
    """
    def load_data(file_path, antibiotic):
        """
        Loads a gzipped TSV file and adds an antibiotic column.

        Args:
            file_path (str): Path to the file to load.
            antibiotic (str): Name of the antibiotic to add in the column.

        Returns:
            pd.DataFrame: Loaded DataFrame with the antibiotic column, or None if there's an error.
        """
        try:
            # Read the gzipped TSV file
            df = pd.read_csv(file_path, compression='gzip', sep='\t')
            # Insert the antibiotic column as the first column
            df.insert(0, 'antibiotic', antibiotic)
            return df
        except Exception as e:
            print(f"Error loading the file {file_path}: {e}")
            return None

    # Load the resistance data for meropenem and ciprofloxacin
    meropenem_df = load_data(os.path.join(directory, 'ResistanceMeropenemStrict.tsv.gz'), 'meropenem')
    ciprofloxacin_df = load_data(os.path.join(directory, 'ResistanceCiprofloxacinStrict.tsv.gz'), 'ciprofloxacin')

    # Check if both DataFrames loaded successfully
    if meropenem_df is not None and ciprofloxacin_df is not None:
        # Combine the DataFrames
        combined_df = pd.concat([meropenem_df, ciprofloxacin_df], ignore_index=True)

        # Save the combined DataFrame to a new TSV file
        output_file = os.path.join(directory, 'combined_antibiotic_resistance.tsv')
        combined_df.to_csv(output_file, sep='\t', index=False)

    else:
        print("Failed to load DataFrames properly.")



In [6]:
# Call the function with the directory path
combine_resistance_data('/home/jupyter-user5/Camda24_resistance/DataSets')



In [7]:
import pandas as pd
import os

def combine_resistance_data(data_directory, save_directory):
    """
    Loads and combines antibiotic resistance data from gzipped TSV files,
    and saves the result in a new TSV file in the specified save directory.

    Args:
        data_directory (str): Path to the directory containing the data files.
        save_directory (str): Path to the directory where the combined file will be saved.

    Returns:
        None
    """
    def load_data(file_path, antibiotic):
        """
        Loads a gzipped TSV file and adds an antibiotic column.

        Args:
            file_path (str): Path to the file to load.
            antibiotic (str): Name of the antibiotic to add in the column.

        Returns:
            pd.DataFrame: Loaded DataFrame with the antibiotic column, or None if there's an error.
        """
        try:
            # Read the gzipped TSV file
            df = pd.read_csv(file_path, compression='gzip', sep='\t')
            # Insert the antibiotic column as the first column
            df.insert(0, 'antibiotic', antibiotic)
            return df
        except Exception as e:
            print(f"Error loading the file {file_path}: {e}")
            return None

    # Load the resistance data for meropenem and ciprofloxacin
    meropenem_df = load_data(os.path.join(data_directory, 'ResistanceMeropenemStrict.tsv.gz'), 'meropenem')
    ciprofloxacin_df = load_data(os.path.join(data_directory, 'ResistanceCiprofloxacinStrict.tsv.gz'), 'ciprofloxacin')

    # Check if both DataFrames loaded successfully
    if meropenem_df is not None and ciprofloxacin_df is not None:
        # Combine the DataFrames
        combined_df = pd.concat([meropenem_df, ciprofloxacin_df], ignore_index=True)

        # Save the combined DataFrame to a new TSV file in the specified save directory
        output_file = os.path.join(save_directory, 'combined_antibiotic_resistance.tsv')
        combined_df.to_csv(output_file, sep='\t', index=False)

        print(f"Combined file saved as: {output_file}")
    else:
        print("Failed to load DataFrames properly.")



In [9]:
# Example usage
combine_resistance_data('/home/jupyter-user5/Camda24_resistance/DataSets', "/home/jupyter-user5/Camda24_resistance/DataSets/group-2/data/")




Combined file saved as: /home/jupyter-user5/Camda24_resistance/DataSets/group-2/data/combined_antibiotic_resistance.tsv
