# Skeleton Notebook

This notebook contains general transformations that can be reused across different projects.

## Import Necessary Libraries

The following libraries are essential for data processing and should be included in every module.

In [2]:
# Import necessary libraries
import pandas as pd
import os
import re

print("Skeleton setup complete!")

Skeleton setup complete!


# Define Variables

Set the variables for file paths, base names, and other configurations. Update these variables for each specific project.

In [3]:
# Base name for the CSV files (change this for each project)
base_name = '8.'

# Path to the directory containing the CSV files (change this for each project)
directory_path = '/workspaces/Finetwork-Automation/sample_folder'

# Path for a single file (example, change as needed)
single_file_path = os.path.join(directory_path, f"{base_name}01.csv")

# Values of specific cells
value="mariangeles.bueso@originaltelecom.es" 

print("Variables defined correctly!")

Variables defined correctly!


# Function to Load Single Data File

This function reads a single CSV file and loads it into a Pandas DataFrame.

In [2]:
def load_single_data(file_path):
    df = pd.read_csv(file_path, delimiter=';')
    print(f"File {file_path} read correctly.")
    return df

# Function to Load Multiple Files Automatically

This function reads multiple CSV files from a directory based on a base name and an index.

In [None]:
def load_multiple_files(directory_path, base_name):
    # Initialize the index
    i = 1

    # List to hold the DataFrames
    dataframes = []

    while True:
        # Construct the file name with leading zeros
        file_name = f"{base_name}{i:02d}.csv"
        file_path = os.path.join(directory_path, file_name)

        # Check if the file exists
        if os.path.exists(file_path):
            # Read the CSV file
            df = pd.read_csv(file_path, delimiter=';')
            dataframes.append(df)
            print(f"Read file: {file_name}")
        else:
            # Break the loop if the file does not exist
            print(f"File not found: {file_name}. Stopping the loop.")
            break

        # Increment the index
        i += 1

    return dataframes

## Function to Remove a Column if it Contains a Specific Cell Value

This function removes a column from the DataFrame if any of its cells contain a specified string or numeric value.

In [None]:
def remove_column_if_contains(df, value):
    """
    Remove columns if any cell in the column contains the specified value.

    Parameters:
    df (pd.DataFrame): The DataFrame to modify.
    value (str or int or float): The value to check for in the cells.

    Returns:
    pd.DataFrame: The modified DataFrame with columns removed.
    """
    cols_to_remove = [col for col in df.columns if df[col].astype(str).str.contains(str(value)).any()]
    df = df.drop(columns=cols_to_remove)
    print(f"Columns containing the value '{value}' removed successfully: {cols_to_remove}")
    return df

## Function to Remove a Row if it Contains a Specific Cell Value

This function removes rows from the DataFrame if any cell in the row contains a specified string or numeric value.

In [None]:
def remove_row_if_contains(df, value):
    """
    Remove rows if any cell in the row contains the specified value.

    Parameters:
    df (pd.DataFrame): The DataFrame to modify.
    value (str or int or float): The value to check for in the cells.

    Returns:
    pd.DataFrame: The modified DataFrame with rows removed.
    """
    initial_row_count = len(df)
    df = df[~df.applymap(lambda x: str(value) in str(x)).any(axis=1)]
    final_row_count = len(df)
    rows_removed = initial_row_count - final_row_count
    print(f"Rows containing the value '{value}' removed successfully: {rows_removed}")
    return df

# Function to Get Column Based on Value

This function identifies and returns a column that contains a specified value.

In [None]:
def get_column_by_value(df, value):
    for col in df.columns:
        if df[col].astype(str).str.contains(str(value)).any():
            print(f"Column '{col}' contains the value '{value}'.")
            return df[col]
    print(f"No column contains the value '{value}'.")
    return None

# Function to Remove Column Based on Value

This function removes a column from the DataFrame if any of its cells contain a specified string or numeric value.

In [None]:
def remove_column_if_contains(df, value):
    cols_to_remove = [col for col in df.columns if df[col].astype(str).str.contains(str(value)).any()]
    df = df.drop(columns=cols_to_remove)
    print(f"Columns containing the value '{value}' removed successfully: {cols_to_remove}")
    return df

# Function to Get Row Based on Value

This function identifies and returns a row that contains a specified value.

In [None]:
def get_row_by_value(df, value):
    rows = df[df.applymap(lambda x: str(value) in str(x))]
    if not rows.empty:
        print(f"Row(s) containing the value '{value}' found.")
        return rows
    print(f"No row contains the value '{value}'.")
    return None

# Function to Remove Row Based on Value

This function removes rows from the DataFrame if any cell in the row contains a specified string or numeric value.

In [None]:
def remove_row_if_contains(df, value):
    initial_row_count = len(df)
    df = df[~df.applymap(lambda x: str(value) in str(x)).any(axis=1)]
    final_row_count = len(df)
    rows_removed = initial_row_count - final_row_count
    print(f"Rows containing the value '{value}' removed successfully: {rows_removed}")
    return df

# Perform Mathematical Operations

This section performs basic mathematical operations between two columns.

In [None]:
def perform_math_operation(df, colA, colB, operation='add'):
    """
    Perform a mathematical operation between two columns.

    Parameters:
    df (pd.DataFrame): The DataFrame to modify.
    colA (str): The name of the first column.
    colB (str): The name of the second column.
    operation (str): The mathematical operation to perform ('add', 'subtract', 'multiply', 'divide').

    Returns:
    pd.Series: The resulting column from the operation.
    """
    if operation == 'add':
        result = df[colA] + df[colB]
    elif operation == 'subtract':
        result = df[colA] - df[colB]
    elif operation == 'multiply':
        result = df[colA] * df[colB]
    elif operation == 'divide':
        result = df[colA] / df[colB]
    else:
        raise ValueError("Operation must be 'add', 'subtract', 'multiply', or 'divide'.")
    
    print(f"Operation '{operation}' between columns '{colA}' and '{colB}' performed successfully.")
    return result

# Replace Existing Column

This function replaces an existing column in the DataFrame with new values.

In [None]:
def replace_column(df, col_name, new_values):
    df[col_name] = new_values
    print(f"Column '{col_name}' replaced successfully.")
    return df

# Create New Column

This function creates a new column in the DataFrame with specified values.

In [None]:
def create_new_column(df, new_col_name, values):
    df[new_col_name] = values
    print(f"New column '{new_col_name}' created successfully.")
    return df

# Create New DataFrame and File

This function creates a new DataFrame and saves it to a CSV file.

In [None]:
def create_new_dataframe_and_save(df, new_col_name, values, new_file_path):
    new_df = df.copy()
    new_df[new_col_name] = values
    new_df.to_csv(new_file_path, index=False)
    print(f"New DataFrame with column '{new_col_name}' saved to '{new_file_path}' successfully.")
    return new_df