In [None]:
import pandas as pd

# Load the taxonomy CSV file
file_path = '/Volumes/CAS9/Aeiwz/Project/HCC 16s ONT/minimap2/output/sam/taxonomy.csv'
taxonomy_df = pd.read_csv(file_path)

# Display the first few rows of the dataframe to understand its structure
taxonomy_df.head()


In [None]:
import pandas as pd
import numpy as np
from scipy.stats import entropy
from collections import Counter

# Load the taxonomy CSV file
file_path = '/Volumes/CAS9/Aeiwz/Project/HCC 16s ONT/minimap2/output/sam/taxonomy.csv'
taxonomy_df = pd.read_csv(file_path)

# Extract only the sample columns (assume all columns except 'Taxonomy' and other metadata are sample columns)
sample_columns = taxonomy_df.columns[:-9]  # Adjust based on your metadata columns

def berger_parker_index(counts):
    return np.max(counts) / np.sum(counts)

def effective_number_of_species(shannon_index):
    return np.exp(shannon_index)

def fishers_alpha(counts):
    from scipy.optimize import minimize_scalar
    a = sum(counts)
    s = len(counts)
    
    def equation(alpha):
        return (s/alpha) - sum([((alpha / (alpha + k))**k) for k in counts])
    
    res = minimize_scalar(equation, bounds=(0.01, 100), method='bounded')
    return res.x

def inverse_simpson_index(counts):
    total = np.sum(counts)
    return 1 / np.sum((counts / total) ** 2)

def pielou_evenness(shannon_index, richness):
    return shannon_index / np.log(richness)

def richness(counts):
    return np.sum(counts > 0)

def shannon_diversity_index(counts):
    return entropy(counts, base=np.e)

def simpson_index(counts):
    total = np.sum(counts)
    return np.sum((counts / total) ** 2)

# Initialize a dictionary to store results
results = {}

# Calculate indices for each sample
for sample in sample_columns:
    counts = taxonomy_df[sample].values
    shannon_index = shannon_diversity_index(counts)
    
    results[sample] = {
        'Berger Parker index': berger_parker_index(counts),
        'Effective number of species': effective_number_of_species(shannon_index),
        'Fisher\'s alpha': fishers_alpha(counts),
        'Inverse Simpson\'s index': inverse_simpson_index(counts),
        'Pielou\'s evenness': pielou_evenness(shannon_index, richness(counts)),
        'Richness': richness(counts),
        'Shannon diversity index': shannon_index,
        'Simpson\'s index': simpson_index(counts),
        'Total count': np.sum(counts)
    }

# Convert results to a DataFrame for better visualization
results_df = pd.DataFrame(results).T
results_df.sort_index(inplace=True)
# Display the results
results_df.head()


In [None]:
import pandas as pd
import numpy as np

# Load the taxonomy CSV file
file_path = '/Volumes/CAS9/Aeiwz/Project/HCC 16s ONT/minimap2/output/sam/taxonomy.csv'
taxonomy_df = pd.read_csv(file_path)

# Define function to calculate relative abundance
def relative_abundance(counts):
    total = np.sum(counts)
    return counts / total if total > 0 else counts

# Calculate relative abundance for each sample
relative_abundance_df = taxonomy_df[sample_columns].apply(relative_abundance, axis=0)

# Taxonomic ranks
taxonomy_ranks = ['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species']

# Initialize a dictionary to store DataFrames
relative_abundance_by_rank_dfs = {}

# Calculate and save relative abundance for each taxonomic rank
for rank in taxonomy_ranks:
    # Group by the taxonomic rank and sum the relative abundances
    grouped = taxonomy_df.groupby(rank)[sample_columns].sum().apply(relative_abundance, axis=1)
    
    # Save the DataFrame to the dictionary
    relative_abundance_by_rank_dfs[rank] = grouped
    
    # Save the DataFrame to a CSV file
    grouped.to_csv(f'/Volumes/CAS9/Aeiwz/Project/HCC 16s ONT/minimap2/output/relative_abundance_{rank}.csv')

# Display the first few rows of the DataFrame for one of the ranks
#relative_abundance_by_rank_dfs


In [None]:
import pandas as pd
import numpy as np

# Load the taxonomy CSV file
file_path = '/Volumes/CAS9/Aeiwz/Project/HCC 16s ONT/minimap2/output/sam/taxonomy.csv'
taxonomy_df = pd.read_csv(file_path)

# Define function to calculate richness
def calculate_richness(group):
    return (group > 0).sum(axis=1)

# Taxonomic ranks
taxonomy_ranks = ['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species']

# Initialize a dictionary to store DataFrames
richness_by_rank_dfs = {}

# Calculate and save richness for each taxonomic rank
for rank in taxonomy_ranks:
    # Group by the taxonomic rank and calculate richness
    grouped = taxonomy_df.groupby(rank)[sample_columns].apply(calculate_richness)
    
    # Save the DataFrame to the dictionary
    richness_by_rank_dfs[rank] = grouped
    
    # Save the DataFrame to a CSV file
    grouped.to_csv(f'/Volumes/CAS9/Aeiwz/Project/HCC 16s ONT/minimap2/output/richness_{rank}.csv')

# Display the first few rows of the DataFrame for one of the ranks
richness_by_rank_dfs['Class']


In [None]:
import pandas as pd
import numpy as np

# Load the taxonomy CSV file
file_path = '/Volumes/CAS9/Aeiwz/Project/HCC 16s ONT/minimap2/output/sam/taxonomy.csv'
taxonomy_df = pd.read_csv(file_path)

# Extract sample columns (adjust as necessary)
sample_columns = taxonomy_df.columns[:-9]  # Adjust based on your metadata columns

# Define function to calculate relative abundance
def relative_abundance(counts):
    total = np.sum(counts)
    return counts / total if total > 0 else counts

# Define function to calculate richness
def calculate_richness(group):
    return (group > 0).sum()

# Taxonomic ranks
taxonomy_ranks = ['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species']

# Initialize dictionaries to store DataFrames
relative_abundance_by_rank_dfs = {}
richness_by_rank_dfs = {}

# Calculate and save relative abundance and richness for each taxonomic rank
for rank in taxonomy_ranks:
    # Group by the taxonomic rank
    grouped = taxonomy_df.groupby(rank)[sample_columns].sum()
    
    # Calculate relative abundance
    relative_abundance_df = grouped.apply(relative_abundance, axis=1)
    relative_abundance_by_rank_dfs[rank] = relative_abundance_df
    
    # Calculate richness
    richness_df = grouped.apply(calculate_richness, axis=1)
    richness_by_rank_dfs[rank] = richness_df
    
    # Save the DataFrames to CSV files
    relative_abundance_df.to_csv(f'/Volumes/CAS9/Aeiwz/Project/HCC 16s ONT/minimap2/output/relative_abundance_{rank}.csv')
    richness_df.to_csv(f'/Volumes/CAS9/Aeiwz/Project/HCC 16s ONT/minimap2/output/richness_{rank}.csv')

# Display the first few rows of the DataFrame for one of the ranks
print("Relative Abundance - Phylum:\n", relative_abundance_by_rank_dfs['Phylum'].head())
print("Richness - Phylum:\n", richness_by_rank_dfs['Phylum'].head())


In [None]:
import pandas as pd
import numpy as np

# Load the taxonomy CSV file
file_path = '/Volumes/CAS9/Aeiwz/Project/HCC 16s ONT/minimap2/output/sam/taxonomy.csv'
taxonomy_df = pd.read_csv(file_path)

# Extract sample columns (adjust as necessary)
sample_columns = taxonomy_df.columns[:-9]  # Adjust based on your metadata columns

# Define function to calculate richness per sample
def calculate_richness(group):
    return (group > 0).sum(axis=0)

# Taxonomic ranks
taxonomy_ranks = ['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species']

# Initialize a dictionary to store DataFrames
richness_by_sample_rank_dfs = {}

# Calculate and save richness per sample for each taxonomic rank
for rank in taxonomy_ranks:
    # Group by the taxonomic rank
    grouped = taxonomy_df.groupby(rank)[sample_columns]
    
    # Calculate richness per sample
    richness_df = grouped.apply(calculate_richness)
    richness_by_sample_rank_dfs[rank] = richness_df
    
    # Save the DataFrame to a CSV file
    richness_df.to_csv(f'/Volumes/CAS9/Aeiwz/Project/HCC 16s ONT/minimap2/output/richness_per_sample_{rank}.csv')

# Display the first few rows of the DataFrame for one of the ranks
print("Richness per Sample - Phylum:\n", richness_by_sample_rank_dfs['Phylum'].head())


In [None]:
richness_per_sample_by_rank_dfs['Phylum']

In [None]:
import pandas as pd
from diversity_indicate import (
    berger_parker_index,
    effective_number_of_species,
    fishers_alpha,
    inverse_simpson_index,
    pielou_evenness,
    richness,
    shannon_diversity_index,
    simpson_index
)

from relative_abundance import relative_abundance
from richness import calculate_richness, calculate_richness_per_sample


# Load your taxonomy CSV file
file_path = '/Volumes/CAS9/Aeiwz/Project/HCC 16s ONT/minimap2/output/sam/taxonomy.csv'
taxonomy_df = pd.read_csv(file_path)
sample_columns = taxonomy_df.columns[:-9]  # Adjust based on your metadata columns

# Example of calculating Shannon diversity index for a sample
counts = taxonomy_df[sample_columns[0]].values
shannon_index = shannon_diversity_index(counts)

# Calculate and save richness per sample for each taxonomic rank
taxonomy_ranks = ['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species']
output_path = '/Volumes/CAS9/Aeiwz/Project/HCC 16s ONT/minimap2/output'
richness_by_sample_rank_dfs = calculate_richness_per_sample(taxonomy_df, sample_columns, taxonomy_ranks, output_path)


In [None]:
from diversity_indicate import calculate_diversity_indices

In [None]:
import diversity_indicate
print(dir(diversity_indicate))


In [None]:
import pandas as pd
from diversity_indicate import calculate_diversity_indices
from relative_abundance import *
from richness import *

# Load your taxonomy CSV file
file_path = '/Volumes/CAS9/Aeiwz/Project/HCC 16s ONT/minimap2/output/sam/taxonomy.csv'
taxonomy_df = pd.read_csv(file_path)
sample_columns = taxonomy_df.columns[:-9]  # Adjust based on your metadata columns
taxonomy_ranks = ['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species']
output_path = '/Volumes/CAS9/Aeiwz/Project/HCC 16s ONT/minimap2/output'

# Example of calculating all diversity indices for a sample
counts = taxonomy_df[sample_columns[0]].values
indices = calculate_diversity_indices(counts)
indices

# Calculate and save relative abundance
calculate_relative_abundance(taxonomy_df, sample_columns, taxonomy_ranks, output_path)

# Calculate and save richness per sample
richness_by_sample_rank_dfs = calculate_richness_per_sample(taxonomy_df, sample_columns, taxonomy_ranks, output_path)


In [2]:
# Example usage:
from diversity_indicate import calculate_diversity_indices

file_path = '/Volumes/CAS9/Aeiwz/Project/HCC 16s ONT/minimap2/output/sam/taxonomy.csv'
results_df = calculate_diversity_indices(file_path)
results_df.to_csv('/Volumes/CAS9/Aeiwz/Project/HCC 16s ONT/minimap2/output/diversity_indicate.csv')


In [None]:
indices

In [None]:
!find . -name "*.pyc" -delete
!find . -name "__pycache__" -delete


In [None]:
a <- 1
b <- 2