In [2]:
import pandas as pd


def modify_diagonal(df):
  """Modifies the diagonal of a dataframe with mean of each column.

  Args:
      df: A pandas dataframe.

  Returns:
      A pandas dataframe with the modified diagonal.
  """
  for i in range(df.shape[0]):
    df.iloc[i, i] = df.iloc[:, i].mean()
  return df

# Example usage
example_df = pd.DataFrame([[1.5, 2, 3, 4], [5, 1, 7, 8], [9, 6, 2, 1]], columns=list('ABCD'))
print(example_df)
result_df = modify_diagonal(example_df.copy())
print(result_df)


     A  B  C  D
0  1.5  2  3  4
1  5.0  1  7  8
2  9.0  6  2  1
          A  B  C  D
0  5.166667  2  3  4
1  5.000000  3  7  8
2  9.000000  6  4  1


In [7]:
import pandas as pd

def remove_sparse_rows_cols(df, percent_threshold):
    """
    Removes rows and columns from a DataFrame where more than a specified
    percentage of values are zero.

    Args:
        df (pd.DataFrame): The DataFrame to filter.
        percent_threshold (float): The percentage of zeros above which a row
            or column will be removed. 

    Returns:
        pd.DataFrame: The filtered DataFrame.
    """

    # Filter rows
    rows_to_keep = (df != 0).sum(axis=1) / df.shape[1] > 1 - percent_threshold
    df = df[rows_to_keep]

    # Filter columns
    cols_to_keep = (df != 0).sum(axis=0) / df.shape[0] > 1 - percent_threshold
    df = df.loc[:, cols_to_keep]

    return df

# Example usage

# Create a sample DataFrame
data = {'col1': [0, 1, 0, 4, 1],
        'col2': [5, 6, 0, 0, 0],
        'col3': [0, 0, 8, 9, 10],
        'col4': [11, 0, 13, 0, 15],}
df = pd.DataFrame(data)

# Apply the function with a threshold of 60% zeros
filtered_df = remove_sparse_rows_cols(df, percent_threshold=0.36)
print(filtered_df)


   col1  col3  col4
4     1    10    15


In [11]:
import pandas as pd

# Create a DataFrame with zeros
matrix = pd.DataFrame([[0, 0, 3], [0, 1, 7], [0, 0, 0]], columns=list('ABC'))

# Apply the function to remove zero rows and columns
result = matrix.loc[:, (matrix != 0).any(axis=0)]

# Print the modified DataFrame
print(result)


   B  C
0  0  3
1  1  7
2  0  0


In [14]:
import pandas as pd
import numpy as np


np.random.seed(2)
df = pd.DataFrame(np.random.randint(0,10,size=(5,5)))
# set the first row  to zero
df.iloc[0] = 0
# set the first column to zero
df.iloc[:,0] = 0
print(df)

def g(df):
    return df.loc[(df.sum(axis=1) != 0), (df.sum(axis=0) != 0)]

result = g(df.copy())
print(result)



   0  1  2  3  4
0  0  0  0  0  0
1  0  2  1  5  4
2  0  5  7  3  6
3  0  3  7  6  1
4  0  5  8  4  6
   1  2  3  4
1  2  1  5  4
2  5  7  3  6
3  3  7  6  1
4  5  8  4  6
