In [18]:
import pandas as pd
import os

# Function to filter rows based on specified chromosomes and save to a new CSV
def filter_chromosomes(input_csv, chrom1_filter, chrom2_filter, output_dir):
    # Load the CSV file into a pandas DataFrame
    df = pd.read_csv(input_csv, delimiter="\t")  # Assuming tab-separated CSV
    
    # Filter the rows based on the chromosome conditions
    filtered_df = df[(df['chrom1'] == chrom1_filter) & (df['chrom2'] == chrom2_filter)]
    
    # Extract the original file name (without extension) and construct the new file name
    original_file_name = os.path.splitext(os.path.basename(input_csv))[0]
    
    # Create the new output file name
    output_file = f"{output_dir}/{original_file_name}_filtered_{chrom1_filter}_{chrom2_filter}.csv"
    
    # Write the filtered DataFrame to a new CSV file
    filtered_df.to_csv(output_file, index=False)
    
    print(f"Filtered data saved to: {output_file}")

# Define the input CSV path and the desired chromosome values
input_csv = r"C:\Users\ibirc\OneDrive\Documents\Projects\CD_300\Cetaceans\synteny_results\synteny_sperm_whale_parsed.vaquita_parsed.csv"  # Replace with your actual input file path
chrom1_filter = 14  # Chromosome 1 filter
chrom2_filter = 20  # Chromosome 2 filter
output_dir = r"C:\Users\ibirc\OneDrive\Documents\Projects\CD_300\Cetaceans\synteny_results\parsed_synteny"

# Call the function to filter and save the data
filter_chromosomes(input_csv, chrom1_filter, chrom2_filter, output_dir)


Filtered data saved to: C:\Users\ibirc\OneDrive\Documents\Projects\CD_300\Cetaceans\synteny_results\parsed_synteny/synteny_sperm_whale_parsed.vaquita_parsed_filtered_14_20.csv


In [10]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from pycircos import Circos

# Directory containing filtered synteny files
input_dir = r"C:\Users\ibirc\OneDrive\Documents\Projects\CD_300\Cetaceans\synteny_results\parsed_synteny_chromosomes"

# Get all CSV files in the directory
csv_files = [f for f in os.listdir(input_dir) if f.endswith(".csv")]

# Dictionary to track chromosome positions
chromosomes = {}
links = []

# Load data from all files
for file in csv_files:
    file_path = os.path.join(input_dir, file)
    df = pd.read_csv(file_path, delimiter="\t")
    
    for _, row in df.iterrows():
        chrom1, chrom2 = f"{row['chrom1']}", f"{row['chrom2']}"
        
        # Store chromosome sizes arbitrarily (adjust as needed)
        if chrom1 not in chromosomes:
            chromosomes[chrom1] = 1000000  # Placeholder size
        if chrom2 not in chromosomes:
            chromosomes[chrom2] = 1000000
        
        # Store link information
        links.append((chrom1, chrom2, row['start1'], row['end1'], row['start2'], row['end2']))

# Create a Circos plot
circos = Circos()

# Add chromosome segments
for chrom, size in chromosomes.items():
    circos.add_segment(chrom, 0, size)

# Add links between chromosomes
for chrom1, chrom2, start1, end1, start2, end2 in links:
    circos.chord((chrom1, start1, end1), (chrom2, start2, end2), color='blue', alpha=0.5)

# Plot the final figure
plt.figure(figsize=(10, 10))
ax = plt.gca()
ax.set_xticks([])
ax.set_yticks([])
circos.draw()
plt.title("Synteny Plot for All Species")
plt.show()


KeyError: 'chrom1'