# ðŸ”¬ OmicFlow: PCA Visualization by Cell Type
This notebook visualizes PCA results stored in S3, merged with cell metadata to show clustering by cell type.

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import boto3

bucket = "omicflow-data-bucket"
pca_key = "results/pca_results.csv"
meta_key = "cell_metadata.csv"
region = "us-east-2"

session = boto3.Session(region_name=region)
s3 = session.client("s3")

def read_csv_from_s3(bucket, key):
    obj = s3.get_object(Bucket=bucket, Key=key)
    return pd.read_csv(obj['Body'])


In [None]:
df_pca = read_csv_from_s3(bucket, pca_key)
df_meta = read_csv_from_s3(bucket, meta_key)

# Assuming PCA kept cell index aligned
df_pca['Cell_ID'] = df_pca.index
df = pd.merge(df_pca, df_meta, on='Cell_ID', how='left')
df.head()


In [None]:
plt.figure(figsize=(10, 8))
sns.scatterplot(data=df, x='PC1', y='PC2', hue='Cell Type', palette='tab10', alpha=0.7, s=20)
plt.title("PCA Projection Colored by Cell Type")
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.legend(title="Cell Type", bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()
