In [1]:
import pandas as pd
import numpy as np
from skbio.stats.distance import DistanceMatrix, permanova, permdisp
import seaborn as sns
import matplotlib.pyplot as plt

# Load data from a TSV file
data = pd.read_csv('/Users/dimitriosgiakoumas/Downloads/PERM_ASV/bray_curtis_distance_matrix_mvf_ASV/distance-matrix.tsv', sep='\t', index_col=0)
dist_matrix = DistanceMatrix(np.ascontiguousarray(data.values), ids=data.index.tolist())  # Ensure the data is C-contiguous

# Given sample IDs
male_samples = ["T20", "T107", "T203", "T212", "T99", "T195", "T205", "T160", "T216", "T209", "T16"]
female_samples = ["T11", "T85.1", "T65", "T196", "TAK", "T159", "TAA", "T4.1", "T214", "T136", "T17", "T217"]

# Create a grouping vector
group = ['male' if id in male_samples else 'female' if id in female_samples else 'unknown' for id in data.index]

# Perform PERMANOVA and PERMDISP
permanova_result = permanova(dist_matrix, group, permutations=999)
permdisp_result = permdisp(dist_matrix, group, permutations=999)

# Output the results
print("PERMANOVA Results:", permanova_result, "\nPERMDISP Results:", permdisp_result, sep='\n')

# Extract distances for visualization
def extract_distances(matrix, group1, group2):
    return [matrix.at[s1, s2] for s1 in group1 for s2 in group2 if s1 != s2]

within_male_distances = extract_distances(data, male_samples, male_samples)
within_female_distances = extract_distances(data, female_samples, female_samples)
between_mf_distances = extract_distances(data, male_samples, female_samples)

# Create a DataFrame for visualization
df = pd.DataFrame({
    'Group': ['Within Male'] * len(within_male_distances) +
             ['Within Female'] * len(within_female_distances) +
             ['Between M-F'] * len(between_mf_distances),
    'Distance': within_male_distances + within_female_distances + between_mf_distances
})

# Calculate effect size (R^2)
SS_between = permanova_result['test statistic'] * (permanova_result['number of groups'] - 1)
SS_total = SS_between + (permanova_result['sample size'] - permanova_result['number of groups'])
R2 = SS_between / SS_total
print(f"\nEffect Size (R^2): {R2:.4f}")

# Define your color palette here
custom_palette = ['blue', 'pink', '#c8a2c8']  # Blue, Pink, Light Purple

# Plotting
plt.figure(figsize=(10, 6))
ax = sns.boxplot(x='Group', y='Distance', hue='Group', data=df, palette=custom_palette, dodge=False)

# Adjust the legend
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles[:3], ['Within Male', 'Within Female', 'Between M-F'], title='Group')

# Remove the hue legend since it's redundant
ax.get_legend().remove()

# Find the maximum distance for setting the height of significance bar
max_distance = df['Distance'].max()

# Function to add significance bars
def add_significance_bar(start, end, height, display_text, ax):
    ax.plot([start, start, end, end], [height, height + 0.05, height + 0.05, height], lw=1.5, c='black')
    ax.text((start+end)*.5, height + 0.05, display_text, ha='center', va='bottom', color='black')

# Add the significance bar at an appropriate height
# The height is set to max_distance + 0.1 (or more) to ensure it is


# Add the significance bar at an appropriate height
# The height is set to max_distance + 0.1 (or more) to ensure it is above the highest boxplot
#significance_height = max_distance + 0.1
#add_significance_bar(start=0, end=1, height=significance_height, display_text='*', ax=ax)

plt.title('Pairwise Comparisons from Bray-Curtis Distance Matrix')
plt.show()



PERMANOVA Results:
method name               PERMANOVA
test statistic name        pseudo-F
sample size                      25
number of groups                  3
test statistic             1.258995
p-value                       0.094
number of permutations          999
Name: PERMANOVA results, dtype: object

PERMDISP Results:
method name               PERMDISP
test statistic name        F-value
sample size                     25
number of groups                 3
test statistic            4.786462
p-value                      0.462
number of permutations         999
Name: PERMDISP results, dtype: object

Effect Size (R^2): 0.1027


AttributeError: module 'numpy' has no attribute 'float'.
`np.float` was a deprecated alias for the builtin `float`. To avoid this error in existing code, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.
The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
    https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations

<Figure size 1000x600 with 0 Axes>

In [None]:
#All samples in
biom convert -i /Users/dimitriosgiakoumas/Downloads/PERM_ASV/ASV_Table.tsv -o /Users/dimitriosgiakoumas/Downloads/PERM_ASV/ASV_Table.biom --table-type="OTU table" --to-hdf5

qiime tools import \
    --type 'FeatureTable[Frequency]' \
           --input-path /Users/dimitriosgiakoumas/Downloads/PERM_ASV/ASV_Table.biom \
           --output-path /Users/dimitriosgiakoumas/Downloads/PERM_ASV/ASV_Table.qza \
           --input-format BIOMV210Format

qiime diversity beta \
                --i-table /Users/dimitriosgiakoumas/Downloads/PERM_ASV/ASV_Table.qza \
                --p-metric braycurtis \
                           --o-distance-matrix /Users/dimitriosgiakoumas/Downloads/PERM_ASV/bray_curtis_distance_matrix_mvf_ASV.qza

qiime tools export \
            --input-path /Users/dimitriosgiakoumas/Downloads/PERM_ASV/bray_curtis_distance_matrix_mvf_ASV.qza \
            --output-path /Users/dimitriosgiakoumas/Downloads/PERM_ASV/bray_curtis_distance_matrix_mvf_ASV


In [None]:
#Took out samples T85.2 and T85.3
biom convert -i /Users/dimitriosgiakoumas/Downloads/PERM1/Composition_Summary_L7_mvf_1.tsv -o /Users/dimitriosgiakoumas/Downloads/PERM1/Composition_Summary_L7_mvf_1.biom --table-type="OTU table" --to-hdf5

qiime tools import \
    --type 'FeatureTable[Frequency]' \
           --input-path /Users/dimitriosgiakoumas/Downloads/PERM1/Composition_Summary_L7_mvf_1.biom \
           --output-path /Users/dimitriosgiakoumas/Downloads/PERM1/Composition_Summary_L7_mvf_1.qza \
           --input-format BIOMV210Format

qiime diversity beta \
                --i-table /Users/dimitriosgiakoumas/Downloads/PERM1/Composition_Summary_L7_mvf_1.qza \
                --p-metric braycurtis \
                           --o-distance-matrix /Users/dimitriosgiakoumas/Downloads/PERM1/bray_curtis_distance_matrix_mvf_1.qza

qiime tools export \
            --input-path /Users/dimitriosgiakoumas/Downloads/PERM1/bray_curtis_distance_matrix_mvf_1.qza \
            --output-path /Users/dimitriosgiakoumas/Downloads/PERM1/bray_curtis_distance_matrix_mvf_1


In [1]:
import csv

input_file_path = '/Users/dimitriosgiakoumas/Downloads/PERM_ASV/ASV_Table.csv'
output_file_path = '/Users/dimitriosgiakoumas/Downloads/PERM_ASV/ASV_Table.tsv'

with open(input_file_path, 'r') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')

    with open(output_file_path, 'w', newline='') as tsv_file:
        tsv_writer = csv.writer(tsv_file, delimiter='\t')

        for row in csv_reader:
            tsv_writer.writerow(row)

print("Conversion completed.")


Conversion completed.
