<a href="https://colab.research.google.com/github/agonist11/colabadmixtools/blob/main/ColabADMIXTOOLS_Quick_Start_V5_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **ColabADMIXTOOLS Quick-Start Notebook V5.1**
---
Special thanks to `Florio` and the community for contributions, testing, and documentation.

> Revised 08-05-2025

[![Open ColabADMIXTOOLS Quick-Start Notebook V5.1 in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/agonist11/colabadmixtools/blob/main/ColabADMIXTOOLS_Quick_Start_V5_1.ipynb)

<a href="https://github.com/agonist11/colabadmixtools" target="_blank">**Check out the GitHub with other tools**</a>


---

## **What This Notebook Does**

- Mounts your Google Drive and creates Drive-backed folders for R libraries. `[Section A]`
- Installs or reinstalls the `admixtools2` package into your Drive-backed R library  
- Imports AADR v62 [*1240K and + HO datasets*] (or unzips your uploaded archive) into the runtime.  
- Provides a reusable cell to run **qpAdm** via `rpy2` `[Section B] `
- Persists everything on Drive so you can restart the Colab VM and pick up right where you left off.

---

## **Customize Your Own Analyses**

For all available admixtools2 functions and parameters, see the official reference and build your own workflows:  
<a href="https://uqrmaie1.github.io/admixtools/reference/index.html" target="_blank">**admixtools2 Reference Documentation**</a>

Create a copy of this Notebook to save your changes. ***Warning***: Files and content generated in the runtime are not saved, you must screenshot or save them yourself. Recommended to have a few GB of storage free in your Google Drive.

---

Feel free to fork or star the repo for your own datasets and tweaks:  
<a href="https://github.com/agonist11/colabadmixtools" target="_blank">**https://github.com/agonist11/colabadmixtools**</a>


## **[Section A] First-Time Users or Fresh Re-Installations Only**

In [None]:
#@title **1. Mount Google Drive & Prepare Folders**
from google.colab import drive
import os, shutil

drive.mount('/content/drive', force_remount=True)

ROOT   = '/content/drive/MyDrive/colabadmixtools'
R_LIBS = os.path.join(ROOT, 'R_libs')
AADR   = os.path.join(ROOT, 'AADR')

os.makedirs(ROOT, exist_ok=True)
if os.path.exists(R_LIBS): shutil.rmtree(R_LIBS)
os.makedirs(R_LIBS, exist_ok=True)

os.environ['R_LIBS_USER'] = R_LIBS
print(f"Prepared:\n • {R_LIBS}\n • {AADR}")


In [None]:
#@title **2. Install admixtools (v2) into Google Drive**
%%bash
# install devtools if missing
if ! Rscript -e "quit(status = if (!requireNamespace('devtools', quietly=False)) 1 else 0)"; then
  Rscript -e "install.packages('devtools', repos='https://cloud.r-project.org')"
fi

# install the correct GitHub repo (it's 'uqrmaie1/admixtools', not 'admixtools2')
Rscript -e "devtools::install_github('uqrmaie1/admixtools',
                                   dependencies=TRUE,
                                   force=TRUE,
                                   lib=Sys.getenv('R_LIBS_USER'))"
echo "admixtools installed into $R_LIBS_USER"


## **[Section B] Returning Users Only**

In [None]:
#@title **3. Mount Drive, set R_LIBS & load AADR into /content/**
download_AADR = True  #@param {type:"boolean"}

from google.colab import drive
import os, requests

# 1) Mount Drive and set R_LIBS
drive.mount('/content/drive', force_remount=True)
R_LIBS = '/content/drive/MyDrive/colabadmixtools/R_libs'
os.makedirs(R_LIBS, exist_ok=True)
os.environ['R_LIBS_USER'] = R_LIBS
print("R_LIBS_USER →", R_LIBS)

# 2) Dataverse dataset info
DATAVERSE = "https://dataverse.harvard.edu"
PID       = "doi:10.7910/DVN/FFIDCW"
meta_url  = f"{DATAVERSE}/api/datasets/:persistentId/?persistentId={PID}"

# 3) Fetch file metadata
files = requests.get(meta_url).json()['data']['latestVersion']['files']

if download_AADR:
    print(f"→ Downloading {len(files)} files into /content/ …")
    for f in files:
        fid   = f['dataFile']['id']
        name  = f.get('label', str(fid))
        outp  = os.path.join('/content', name)
        url   = f"{DATAVERSE}/api/access/datafile/{fid}"
        print("Downloading", name)
        with requests.get(url, stream=True) as r:
            r.raise_for_status()
            with open(outp, 'wb') as fp:
                for chunk in r.iter_content(8192):
                    fp.write(chunk)
    print("AADR v62 files downloaded to /content/")
else:
    print("Download disabled. To fetch AADR v62 files into /content/, set download_AADR = True.")


In [None]:
#@title 5. Run qpAdm with full stats, save results, and Plotly donut chart
# --- User parameters ---
prefix      = "/content/v62.0_HO_public"  # @param {type:"string"}
target      = "MXL.DG"                   # @param {type:"string"}
left_pops   = "IBS.DG,Nahua.DG,Yoruba.DG" # @param {type:"string"}
right_pops  = "Mbuti.DG,Russia_UstIshim_IUP.DG,Georgia_Dzudzuana_UP.SG,Russia_Sidelkino_HG.SG,Israel_Natufian.AG,Russia_MA1_UP.SG,Brazil_LocaDoSuin_Sambaqui_9100BP.AG,Switzerland_Bichon_Epipaleolithic.SG"  # @param {type:"string"}
allsnps     = True   # @param {type:"boolean"}
auto_only   = True   # @param {type:"boolean"}
poly_only   = False  # @param {type:"boolean"}
boot        = False  # @param {type:"boolean"}
getcov      = True   # @param {type:"boolean"}
constrained = False  # @param {type:"boolean"}
return_f4   = True   # @param {type:"boolean"}
cpp         = True   # @param {type:"boolean"}
verbose     = True   # @param {type:"boolean"}

# --- Prepare populations ---
left  = [p.strip() for p in left_pops.split(',')  if p.strip()]
right = [p.strip() for p in right_pops.split(',') if p.strip()]

# --- Imports ---
import glob, os, sys
from datetime import datetime
import rpy2.robjects as ro
from rpy2.robjects import StrVector, pandas2ri
pandas2ri.activate()
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from scipy.stats import norm

# --- Load admixtools in R ---
ro.r('.libPaths(c(Sys.getenv("R_LIBS_USER"), .libPaths()))')
ro.r('library(admixtools)')

# --- Push parameters into R ---
for name, val in dict(
    prefix=prefix, left=StrVector(left), right=StrVector(right),
    target=target, allsnps=allsnps, auto_only=auto_only,
    poly_only=poly_only, boot=boot, getcov=getcov,
    constrained=constrained, return_f4=return_f4,
    cpp=cpp, verbose=verbose
).items():
    ro.globalenv[name] = val

# --- Run qpAdm ---
ro.r('''
res <- qpadm(prefix, left, right, target=target,
             allsnps=allsnps,
             auto_only=auto_only,
             poly_only=poly_only,
             boot=boot,
             getcov=getcov,
             constrained=constrained,
             return_f4=return_f4,
             cpp=cpp,
             verbose=verbose)
weights_df  <- as.data.frame(res$weights)
rankdrop_df <- as.data.frame(res$rankdrop)
popdrop_df  <- as.data.frame(res$popdrop)
if (return_f4) f4_df <- as.data.frame(res$f4)
''')

# --- Convert to pandas DataFrames ---
weights  = pandas2ri.rpy2py(ro.r('weights_df'))
rankdrop = pandas2ri.rpy2py(ro.r('rankdrop_df'))
popdrop  = pandas2ri.rpy2py(ro.r('popdrop_df'))
f4       = pandas2ri.rpy2py(ro.r('f4_df')) if return_f4 else None

# --- Compute p-values from z-scores ---
weights['pvalue'] = 2 * (1 - norm.cdf(np.abs(weights['z'])))

# --- Determine timestamped results filename ---
ts       = datetime.now().strftime("%Y%m%d_%H%M%S")
outfile  = f"results_run_{ts}.csv"

# --- Combine and save results ---
sections = []
w = weights[['left','weight','se','z','pvalue']].copy(); w['section']='weights'; sections.append(w)
r = rankdrop.copy();                          r['section']='rankdrop'; sections.append(r)
p = popdrop.copy();                           p['section']='popdrop';  sections.append(p)
if f4 is not None:
    f4c = f4.copy();                          f4c['section']='f4';      sections.append(f4c)
combined = pd.concat(sections, ignore_index=True)
combined.to_csv(outfile, index=False)
print(f"Saved all results to {outfile}")

# --- Extract metrics for plotting ---
chi_squared = float(rankdrop["chisq"].iloc[0])
p_value     = float(rankdrop["p"].iloc[0])
props       = weights['weight'] * 100
ses         = weights['se']     * 100
zs          = weights['z']

# --- Build formatted legend labels (all bold) ---
legend_labels = [
    f"<b>{pop}</b> | <b>{prop:.2f}%</b> ± <b>{se:.2f}%</b> | <b>Z: {z:.2f}</b>"
    for pop, prop, se, z in zip(weights['left'], props, ses, zs)
]

# --- Create Plotly donut chart ---
fig = go.Figure(go.Pie(
    labels=legend_labels,
    values=props,
    hole=0.6,
    marker=dict(line=dict(color='black', width=3)),
    sort=True
))
fig.update_traces(
    textinfo='none',
    hovertemplate="<b>%{label}</b><br><b>Percent:</b> %{percent}<extra></extra>"
)

fig.update_layout(
    title=dict(
        text=(
            f"<b>ADMIXTOOLS2 | Target Population: {target}</b><br>"
            f"<b>Admixture Proportions (Chi-Squared: {chi_squared:.2f}, P-Value: {p_value:.12g})</b>"
        ),
        x=0.5, y=0.95,
        font=dict(family="Arial", size=18, color="black")
    ),
    legend=dict(
        title=dict(text="<b>Sources & Weights</b>", font=dict(size=14),side="top center"),
        font=dict(size=14, color="black"),
        x=0.9, y=0.6,
        xanchor='left', yanchor='middle'
    ),
    margin=dict(t=100, b=50, l=40, r=200),
    annotations=[dict(
        text="<b>github.com/agonist11/colabadmixtools</b>",
        x=-0.02, y=-0.05,
        xref="paper", yref="paper",
        xanchor="left", yanchor="bottom",
        showarrow=False,
        font=dict(size=10, color="gray"),
        opacity=0.5
    )]
)

fig.show()


# **Tools (In Progress)**

In [None]:
#@title **Tool 1: Upload ZIP via File Browser & Unzip to `/content/`**
#@markdown Slower.
from google.colab import files
import zipfile, os

# 1. Prompt user to upload a ZIP file
uploaded = files.upload()

# 2. Find the first ZIP in the uploads
zip_files = [name for name in uploaded.keys() if name.lower().endswith('.zip')]
if not zip_files:
    print("No .zip file uploaded. Please upload a ZIP and re-run this cell.")
else:
    zip_path = zip_files[0]
    # 3. Unzip into /content/
    with zipfile.ZipFile(zip_path, 'r') as z:
        z.extractall('/content/')
    print(f"Unpacked {zip_path} → /content/")


In [None]:
#@title **Tool 2: Mount Google Drive & Unzip to `/content/`**
#@markdown Faster.
zip_file_path = "/content/drive/MyDrive/colabadmixtools/Florio_mergedHO.zip"  #@param {type:"string"}

from google.colab import drive
import zipfile, os

# 1. Mount your Drive
drive.mount('/content/drive', force_remount=True)

# 2. Validate & unzip into the Colab runtime root
if os.path.isfile(zip_file_path) and zip_file_path.lower().endswith('.zip'):
    with zipfile.ZipFile(zip_file_path, 'r') as z:
        z.extractall('/content/')
    print(f"Unpacked {os.path.basename(zip_file_path)} → /content/")
else:
    print(f"ZIP file not found at:\n  {zip_file_path}\nPlease check the path and try again.")
